1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-09-27 05:44:18 +03:00

Compare commits

...

2750 Commits

Author SHA1 Message Date
Alasdair Kergon
37e5ee6034 Change new lvm2 git URLs from upper-case to lowercase. 2012-09-17 15:58:30 +00:00
Alasdair Kergon
4907609c27 Indicate new location of source code. 2012-06-07 14:28:32 +00:00
Peter Rajnoha
007b9d4239 Remove unsupported udev_get_dev_path libudev call used for checking udev dir.
With latest changes in the udev, some deprecated functions were removed
from libudev amongst which there was the "udev_get_dev_path" function
we used to compare a device directory used in udev and directore set in
libdevmapper. The "/dev" is hardcoded in udev now (udev version >= 183).

Amongst other changes and from packager's point of view, it's also
important to note that the libudev development library ("libudev-devel")
could now be a part of the systemd development library ("systemd-devel")
because of the udev + systemd merge.
2012-05-29 08:09:10 +00:00
Zdenek Kabelac
2439aa4bf1 Fix error path
Do not increase nr_filt in case of NULL ret value, since the error path
doesn't handle NULL pointers.
2012-05-23 13:02:36 +00:00
Zdenek Kabelac
77ce00ddaa Move thin_check_executable to proper section
It's read from global section.
In-release change, so no what's new
2012-05-23 12:59:35 +00:00
Peter Rajnoha
a40c717162 Partial activation working, expect success in tests. 2012-05-17 12:03:30 +00:00
Peter Rajnoha
7b3b05c1f5 Re-enable tests for snapshots of mirrors. 2012-05-17 08:04:11 +00:00
Zdenek Kabelac
4a7f4703ed snaps of mirrors are enabled 2012-05-16 15:01:06 +00:00
Zdenek Kabelac
24972a9fcc ok - that was nice mid-air collision 2012-05-16 13:09:09 +00:00
Zdenek Kabelac
bd4ef6a050 update 2012-05-16 13:06:18 +00:00
Alasdair Kergon
1404dc2fbf Re-enable partial activation of non-thin LVs until it can be fixed. (2.02.90)
- The test should be checking the LV as a whole, not just individual segments.
2012-05-16 12:50:14 +00:00
Petr Rockai
4d45791346 Add a (currently failing) test for RHBZ 816672. 2012-05-16 10:44:36 +00:00
Petr Rockai
876fba9f2a Add provisions to flag tests that we know will fail, without flagging the build
(i.e. an expected failure).
2012-05-16 10:43:41 +00:00
Alasdair Kergon
8b5db5d9b2 Set delay_resume_if_new on deptree snapshot origin.
(Must avoid activating snapshot origin more than once concurrently.)
2012-05-15 21:27:24 +00:00
Alasdair Kergon
8ddca409b4 add major:minor to table size changed debug message 2012-05-15 20:03:12 +00:00
Alasdair Kergon
9cc266a5d2 indicate when deptree detects but ignores size change in debug msg 2012-05-15 14:10:54 +00:00
Alasdair Kergon
bee3b45081 Refer to details of snapshot of raid problem. 2012-05-14 16:29:50 +00:00
Alasdair Kergon
0c9d2a2246 Warn of deadlock risk when using snapshots of mirror segment type. 2012-05-14 16:18:57 +00:00
Alasdair Kergon
bfdfc9e68e Fix cling policy not to behave like normal policy if no previous LV seg.
Fix alloc cling to cling to PVs already found with contiguous policy.
2012-05-11 22:53:13 +00:00
Alasdair Kergon
3a042496fd Fix allocation policy loop so it doesn't continue beyond cling using later
policies it shouldn't be using when --alloc cling is specified but no tags
are defined.
2012-05-11 22:19:12 +00:00
Alasdair Kergon
d2a3be546f Append _TO_LVSEG to names of internal A_CONTIGUOUS and A_CLING flags.
Remove some unnecesary prev_lvseg checks.
2012-05-11 18:59:01 +00:00
Alasdair Kergon
70cacce52e Always include debug mesg when cling to allocated is set. 2012-05-11 15:32:19 +00:00
Alasdair Kergon
b63eb871fd Refactor _has_matching_pv_tag to provide a fn that takes PV structs. 2012-05-11 15:26:30 +00:00
Peter Rajnoha
dcc18799ec More comments on metadata area types. 2012-05-10 11:03:07 +00:00
Peter Rajnoha
588513328c Comment on auxiliary metadata areas. 2012-05-10 10:37:49 +00:00
Zdenek Kabelac
cfc90d7af2 Add missing pkg init 2012-05-10 08:54:33 +00:00
Alasdair Kergon
1b0ca5fc45 Uncomment allocation section to match style of rest of file. 2012-05-10 00:18:49 +00:00
Peter Rajnoha
bab3366f5f Remove VG/PV used in the test. 2012-05-09 14:44:59 +00:00
Peter Rajnoha
501a2c04a4 Fix division by zero if PV with zero PE count is used during vgcfgrestore. 2012-05-09 12:30:56 +00:00
Zdenek Kabelac
9f60083aff Initial support for lvconvert for thin pool volumes.
Support has many limitations and lots of FIXMEs inside,
however it makes initial task when user creates a separate LV for
thin pool data and thin metadata already usable, so let's enable
it for testing.

Easiest API:

lvconvert --chunksize XX --thinpool data_lv  metadata_lv

More functionality extensions will follow up.

TODO: Code needs some rework since a lot of same code is getting copied.
2012-05-09 12:17:06 +00:00
Zdenek Kabelac
b54dc45c90 Fix regression in for_each_sub_lv
pool_lv is not a sub lv in terms for this function.
It has caused problem with renaming thin_volume, where it has tried to
rename pool LV as well.
2012-05-09 12:12:21 +00:00
Alasdair Kergon
a93ad77e02 Log value chosen in _find_config_bool like other variable types do. 2012-05-08 14:31:44 +00:00
Jonathan Earl Brassow
679f946dd9 Fix up-convert when mirror activation is controled by volume_list and tags.
When mirrors are up-converted, a transient mirror layer is put in so that
only the new devices are sync'ed.  That transient layer must carry the tags
of the original mirror LV, otherwise it will fail to activate when activation
is regulated by lvm.conf:activation/volume_list.  The conversion would then
fail.

The fix is to do exactly the same thing that is being done for linear ->
mirror converting (lib/metadata/mirror.c:_init_mirror_log()).  We copy the
tags temporarily for the new LV and remove them after the activation.
2012-05-05 02:08:46 +00:00
Jonathan Earl Brassow
8322deefee Disallow snapshots of mirror segment types.
Snapshots of RAID logical volumes are allowed (including "raid1").  However,
snapshots of "mirror" logical volumes has been disallowed due to unsolvable
issues inherent to the design.  The fact that mirroring (dm-raid1.c) must
stop all I/O as the result of a failure and wait for userspace intervention
can lead to a circular dependency if userspace is simultaneously waiting for
snapshots (on mirrors) to make an I/O update before proceeding.

Various snapshot on mirror tests have been removed as a result.
2012-05-01 19:21:24 +00:00
Jonathan Earl Brassow
8c5517e01d Make a note that "raid1" is not cluster-aware in the example lvm.conf.
In the description of the segment types for the 'mirror_segtype_default'
field, we mention that "raid1" is not cluster-aware.
2012-04-27 18:37:42 +00:00
Peter Rajnoha
fdd34489dd Use dm_strncpy instead of plain strncpy while setting the socket path.
Also check the return value as it meaningless to work with truncated strings.
2012-04-27 09:52:33 +00:00
Jonathan Earl Brassow
172a9457bf Fix bug in cmirror that caused incorrect status info to print on some nodes.
Looking at the code in cmirrord/local.c, we can see the various different
request types handled in different ways.  Some information that is non-changing
does not need to go around the cluster and can be short-circuited.  For
example, once the cluster mirror is in-sync, it is pointless to continue
sending that query around the cluster.  We can save network bandwidth and reply
directly back to the kernel.  When it comes to status information, there are
two types 'TABLE' and 'INFO'.  The 'TABLE' information never changes and
belongs to the group of requests that can be safely short-circuited.  The
'STATUS' information can change - and will change if a device fails.  Thus it
cannot be short-circuited, but this is exactly what was found.  The 'STATUS'
information request was being short-circuited and therefore never reporting the
failure condition to anyone other than the "server" that experienced it
directly.
2012-04-26 17:30:49 +00:00
Alasdair Kergon
7637b40159 Remove statement that snapshots cannot be tagged from lvm man page. 2012-04-26 15:24:46 +00:00
Jonathan Earl Brassow
7072ed86f9 Disallow changing cluster attribute of VG while RAID LVs are active.
Mirror and snapshot LVs are already checked for when switching the cluster
attribute of a VG.  This patch adds RAID.
2012-04-25 13:38:41 +00:00
Peter Rajnoha
ff8ed61f76 WHATS_NEW 2012-04-25 09:32:36 +00:00
Jonathan Earl Brassow
50b2d511ec Allow a subset of failed devices to be replaced in RAID LVs.
If two devices in an array failed, it was previously impossible to replace
just one of them.  This patch allows for the replacement of some, but perhaps
not all, failed devices.
2012-04-24 20:05:31 +00:00
Jonathan Earl Brassow
9ac67656ae Prevent resume from creating error devices that already exist from suspend.
Thanks to agk for providing the patch that prevents resume from attempting
(and then failing) to create error devices which already exist; having been
created by a corresponding suspend operation.
2012-04-24 20:00:03 +00:00
Zdenek Kabelac
c4da860fd5 Synchronize with self-destruction of dmeventd
In some occasional case dmevent restart was experiencing problems
with obtaining pid lockfile. So this patch tries to send several more kill
message until daemon kills itself so there is would reponse.
With this small loop the restart seems to work reliable,
although the loopsize and usleep are just randomly picked for now.
2012-04-24 12:25:12 +00:00
Zdenek Kabelac
7ff84d5269 Expect failure if the reduction doesn't really happen 2012-04-24 12:17:49 +00:00
Zdenek Kabelac
d057cc445d Add some lock comments 2012-04-24 12:17:12 +00:00
Zdenek Kabelac
d614e1f447 Update singlenode locking
Support lock conversion
Work also with LCK_READ
TODO: do more validation.
2012-04-24 12:16:40 +00:00
Zdenek Kabelac
22d0b18687 Sort lvs options by alphabet 2012-04-24 12:14:17 +00:00
Zdenek Kabelac
cfa6500574 Update some lvs column names
Fix thin_pool -> pool_lv
Add more fields supported by thin provisioning.
Keep fields alphabetically sorted for easier lookup.
2012-04-24 12:13:29 +00:00
Peter Rajnoha
cc75389238 Also rename DevNos header to DevNosUsed in dmsetup info -c output. 2012-04-24 08:24:36 +00:00
Peter Rajnoha
e0071a8634 Rename (Blk)DevNames header to (Blk)DevNamesUsed in dmsetup info -c output.
Just to make it clearer since there is the "dmsetup info -c -o blkdevname"
as well that shows the "block device name for this mapping", having a
"BlkDevName" header on output.

It's a bit confusing then if the "dmsetup info -c -o devs_used,blkdevs_used"
is named with a plural "DevNames"/"BlkDevNames" but at the same time having
a totally different meaning than the singular form "BlkDevName".

  DevNames --> DevNamesUsed
  BlkDevNames --> BlkDevNamesUsed

...makes it much more comprehensible.
2012-04-24 08:00:55 +00:00
Alasdair Kergon
4f9c54bdac Handle replacement of an active device that goes missing with an error device.
(E.g. lvchange --refresh --partial on striped LV if a PV disappeared.)
2012-04-24 00:51:26 +00:00
Jonathan Earl Brassow
901d70b09c Unlike 'mirror' segtype, 'raid1' should perform flush on suspend.
The 'mirror' segtype and 'raid1' segtype both set the 'MIRRORED' flag.
However, due to differences in the way these device-mapper targets behave
'mirror' must be suspended with the 'noflush' option and 'raid1' does not
have to be.

This patch ensures that when the 'MIRRORED' flag is checked to see if
'noflush' is needed that it does not also set it for 'raid1' by mistake.
2012-04-20 14:17:44 +00:00
Jonathan Earl Brassow
2620663d7b Fix for bug 807776: invalid lvconvert --merge output should mention raid1 now
s/snapshot/mergeable volume/
2012-04-19 21:50:37 +00:00
Alasdair Kergon
d74afb244b . 2012-04-19 13:47:38 +00:00
Alasdair Kergon
5d048a5e26 . 2012-04-19 13:47:11 +00:00
Peter Rajnoha
0afc1897b2 Add udev info and context to lvmdump.
--------------------------------------------------------------------
2012-04-18 15:26:02 +00:00
Petr Rockai
95b3332658 Add a rudimentary lvmetad manpage. 2012-04-15 15:40:48 +00:00
Alasdair Kergon
b8e51eae00 Remove 'up' from rounding message that sometimes rounds down.
Detect reduction of 0 after rounding for stripes and avoid warning of potential data loss.
2012-04-12 15:11:21 +00:00
Jonathan Earl Brassow
b4eaf64b10 Fix code that performs RAID device replacement while under snapshot.
The code should have been calling [suspend|resume]_lv_origin() rather than
[suspend|resume]_lv.

This addresses bug 807069.
2012-04-12 03:16:37 +00:00
Jonathan Earl Brassow
ba568151a5 Fix inability to split RAID1 image while specifying a particular PV.
The logic for resuming the original and newly split LVs was not properly
done to handle situations where anything but the last device in the array
was split.  It did not take into account the possible name collisions that
might occur when the original LV undergoes the shifting and renaming of its
sub-LVs.
2012-04-11 14:20:19 +00:00
Zdenek Kabelac
0644a64c94 Update test 2012-04-11 12:53:46 +00:00
Alasdair Kergon
28bc212eb0 spacing 2012-04-11 12:49:10 +00:00
Zdenek Kabelac
c63ddca435 Update man pages
Use one style for man pages.
2012-04-11 12:42:10 +00:00
Zdenek Kabelac
4c4094c33c Fix lvresize for thin pool
When resizing thin pool - we need to use strip info from  _tdata volume.
In future more generic solution will be necessary once we start to support
lvconvert (resize of stacked devices and stay properly aligned).
For now we just allow striped or linear LV so this code will work.
2012-04-11 12:40:03 +00:00
Zdenek Kabelac
b51d131c59 Lvresize rounds upward
When given lvresize new size - round upward for stripes - unless we use % and
we are at the border of free extents.

This patch is not a complete fix and few more cases will need special care.
2012-04-11 12:36:37 +00:00
Zdenek Kabelac
87a723122d Support rounding downward for lvcreate and %
If specifying size with % and we are reaching number
of free extents - round downward with stripes.
2012-04-11 12:33:34 +00:00
Zdenek Kabelac
8d92388231 Move check for min strip size
and remove few asigns that are not needed.
2012-04-11 12:30:48 +00:00
Peter Rajnoha
a6d66a030d Change message severity to log_very_verbose for missing dev info in udev db.
Libudev does not provide transactions when querying udev database - once we
get the list of block devices (devices/obtain_device_list_from_udev=1) and
we iterate over the list to get more detailed information about device node
and symlink names used etc., the device could be removed just in between we
get the list and put a query for more info. In this case, libudev returns
NULL value as the device does not exist anymore.

Recently, we've added a warning message to reveal such situations. However,
this could be misleading if the device is not related to the LVM action
we're just processing - the non-related block device could be removed in
parallel and this is not an error but a possible and normal operation.

(N.B. This "missing info" should not happen when devices are related to
the LVM action we're just processing since all such processing should be
synchronized with udev and the udev db must always be in consistent state
after the sync point. But we can't filter this situation out from others,
non-related devices, so we have to lower the message verbosity here for a
general solution.)
2012-04-11 09:12:02 +00:00
Jonathan Earl Brassow
f9961e6f19 RAID LVs could not handle a down-convert if a device other than the last one
in the array was specified for removal.  This change addresses that (bz806111).
2012-04-11 01:23:29 +00:00
Jonathan Earl Brassow
6a69a4472d Commit ID 46a75dedb4 consolidated code from the
various dmeventd plug-ins into a new function called 'dmeventd_lvm2_command',
but the new function did not strip off the "_mlog" extentions that the
mirror plug-in had been doing.  This created bug 794904 - failure to replace
devices in a redundant log.

The test suite did catch this scenario because it performs repair tests (mainly)
through the CLI and not dmeventd.  It's also not easy to test because the test
itself will hang if the bug is encountered.
2012-04-10 23:34:41 +00:00
Jonathan Earl Brassow
35e728ec8d Add a couple new functions to gdbinit file and decode a couple lv->status flags
New functions:
 - seg_pvs: Print a list of PVs in a seg_pvs list
 - pv_dev_name: print name of a PV
2012-04-10 23:24:00 +00:00
Peter Rajnoha
48511e9bd6 strcpy -> strncpy in common daemon code. 2012-04-10 12:55:37 +00:00
Peter Rajnoha
fb4f76a29c Check if info struct returned is not NULL.
Just some missing checks revealed by Coverity in recent code.
2012-04-10 12:26:27 +00:00
Zdenek Kabelac
2d6f57be7e Fix unlocking in error path of vgreduce
When vg_read fails, it internally unlocks VG if it's been locked,
so in error path we should skip unlock_vg for this case.
(user would see ugly internal warning)
2012-03-30 14:59:35 +00:00
Peter Rajnoha
c44897abfc WHATS_NEW 2012-03-30 11:39:52 +00:00
Peter Rajnoha
667d681ce0 Detect VG name being part of the LV name in lvconvert --splitmirrors -n.
Before:
devel/~ # lvconvert --splitmirrors 1 -n vg/splitted_one vg/mirrored_one
  Internal error: LV name vg/splitted_one has invalid form.
  Intermediate VG metadata write failed.

After:
devel/~ # lvconvert --splitmirrors 1 -n vg/splitted_one vg/mirrored_one
  Logical volume mirrored_one converted.

devel/~ # lvconvert --splitmirrors 1 -n abc/splitted_one vg/mirrored_one
  Please use a single volume group name ("vg" or "abc")
  Run `lvconvert --help' for more information.
2012-03-30 08:58:02 +00:00
Zdenek Kabelac
e1b9db7140 Minor fixes
Just small updates and remove <backtrace> after log_error.
2012-03-28 11:11:25 +00:00
Zdenek Kabelac
a5cffcdcf9 Improve test suite
Add make help target.
Add LVM_TEST_PARALLEL to support parallel runs of tests
Work around the problem the dmsetup table/info may return error
by using dmtable and dminfo function that will use 'should'.
(Error happens when some concurently running process removes table
entry while dmsetup command resolves table entries inside the loop.)
2012-03-28 11:10:08 +00:00
Peter Rajnoha
b798ea409d Fix tests to work with vgscan --cache fixing inconsistent metadata. 2012-03-28 07:46:35 +00:00
Milan Broz
7387343eab Avoid closing clvmd socket twice. 2012-03-27 16:59:28 +00:00
Milan Broz
53064420ff Fix exclusive lvmchange -aey to fail if volume is active on different node.
Activation on remote node should be tried only if it is masked by tags
locally (like when hosttags enabled, IOW activate_lv_excl_local()
doesn't return error.)

Introduced change caused that lvchange -aey succeeded even if volume was
activated exclusively remotely.
2012-03-27 15:53:45 +00:00
Milan Broz
c8d671b6ad There is no hotsname_tags but only hosttags... sigh, even docs is wrong :-) 2012-03-27 13:39:08 +00:00
Milan Broz
59a26019e0 Fail early if cmirror is not detected and pvmove requires it. 2012-03-27 12:01:22 +00:00
Milan Broz
195aa7699e Also skip pvmove for remotely active LVs. 2012-03-27 11:43:32 +00:00
Peter Rajnoha
c950126620 Add 'vgscan --cache' functionality for consistency with 'pvscan --cache'.
Calling vgscan alone should reuse information from the lvmetad (if running).
The --cache option should initiate direct device scan and update lvmetad
appropriately (if running).

This is mainly for vgscan to behave consistently compared to pvscan.
2012-03-27 11:04:46 +00:00
Alasdair Kergon
600a3810ea add make install_verity 2012-03-26 23:23:31 +00:00
Alasdair Kergon
749f4ae7cb Rename verity dir 2012-03-26 23:09:37 +00:00
Milan Broz
e674eb6a79 Keep exclusive activation in pvmove if LV is already active.
Pvmove should never try to downgrade exclusive lock
for LVs.

This allows pvmove to work again for exclusive activated LVs.
2012-03-26 20:33:40 +00:00
Milan Broz
ad7d4b6c56 Do not allow pvmove if some affected LVs are activated
locally or on more nodes while others are activated exclusively.

Current pvmove code can either use local mirror (for exclusive
activation) or cmirror (for clustered LVs).

Because the whole intenal pvmove LV is just segmented LV containing
segments of several top-level LVs, code cannot properly handle
situation if some segment need to be activated exclusively.

Previously, it wrongly activated exclusive LV on all nodes
(locing code allowed it) but now this is no lnger possible.

If there is exclusively activated LV, pvmove is only
possible if all affected LVs are aslo activated exclusively.

(Note that in non-exclusive mode pvmove still activates LVs
on other nodes during move.)

# lvchange -aly vg_test/lv1
# lvchange -aey vg_test/lv2
# pvmove -i 1 /dev/sdc
   Error locking on node bar-01: Device or resource busy
   Error locking on node bar-03: Volume is busy on another node
...
   Failed to activate lv2
2012-03-26 20:32:58 +00:00
Milan Broz
ef49063b27 Use new flag PVMOVE_EXCLUSIVE in update_metatada call.
There is no real functional change in this patch except it
avoids checking cluster cmirror module twice.

(Flag used in following patch.)
2012-03-26 20:31:01 +00:00
Milan Broz
0a3a88eb84 Remove unused and wrongly set cluster VG flag from clvmd lock query command. 2012-03-26 20:29:45 +00:00
Alasdair Kergon
23d0fddca5 fixes/improvements 2012-03-24 02:58:56 +00:00
Alasdair Kergon
7e27f33433 First veritysetup version using configure --with-veritysetup. 2012-03-24 01:59:59 +00:00
Milan Broz
9359753aab Fix pvmove if LV is activated exclusively but cmirror is not running.
In this case we should allow to use local mirror, check for cmirror
should apply only for lvconvert/lvcreate.

Introduced in 2.02.86 by removing !(lv->status & ACTIVATE_EXCL).

(Partially workaround, it is minimalistic patch for now.)
2012-03-23 16:28:40 +00:00
Zdenek Kabelac
4dd8a94a20 Add fixmes
There is missing some proper reaction when update fails ?
2012-03-23 10:34:51 +00:00
Zdenek Kabelac
963ccfd981 Always free hash table
also in error path
2012-03-23 10:33:26 +00:00
Zdenek Kabelac
8df1e265f8 Update and fix monitoring of thin pool devices
Code adds better support for monitoring of thin pool devices.
update_pool_lv uses DMEVENTD_MONITOR_IGNORE to not manipulate with monitoring.
vgchange & lvchange are checking real thin pool device for existance
as we are using   _tpool  real device and visible LV pool device might not
be even active (_tpool is activated implicitely for any thin volume).
monitor_dev_for_events is another _lv_postorder like code it might be worth
to think about reusing it here - for now update the code to properly
monitory thin volume deps.
For unmonitoring add extra code to check the usage of thin pool - in case it's in use
unmonitoring of thin volume is skipped.
2012-03-23 09:58:04 +00:00
Zdenek Kabelac
8b2fdb26e5 Return mem fail if hash insert fails 2012-03-23 09:48:17 +00:00
Zdenek Kabelac
4687bd1015 Make sure namelen fits into buffer allocated on stack 2012-03-23 09:43:44 +00:00
Zdenek Kabelac
0237724e17 Fix typo in config option check 2012-03-23 09:42:36 +00:00
Zdenek Kabelac
bc013bc21f Improve teardown_devs
Try to avoid start of dmeventd during vgremove if the code has any bug
inside.
2012-03-23 09:41:20 +00:00
Zdenek Kabelac
3ac8509b99 Update debug message
(compiled only with special debug define)
2012-03-23 09:39:59 +00:00
Zdenek Kabelac
f02892842b Update lcov target 2012-03-23 09:39:03 +00:00
Zdenek Kabelac
31a1a47ad5 Fix regression in thin monitoring
Patch https://www.redhat.com/archives/lvm-devel/2012-February/msg00118.html
removed initilization of thin volume monitoring, leaving it only for
thin pool - but missed the code move part for monitoring of thin pools.
Effectively making thin pools not monitorable.
2012-03-20 17:42:19 +00:00
Zdenek Kabelac
6e7d2724ba Fix check for passed in path for dmeventd startup
Check passed in executable path for dmeventd instead of predefined
compiled in path which is not the thing to be executed.
2012-03-20 17:38:47 +00:00
Zdenek Kabelac
adbf9a803a Sleep longer in the test so it really dies properly
and check for right exit code from pgrep
2012-03-20 13:35:46 +00:00
Zdenek Kabelac
7e7c9bb2f3 Update testing scripts
Make the teardown really usable - it will try down to remove all the left
devices even from previous test runs
(the only missing piece is probably proper mdadm teardown)
Add few more local vars
Try to setup PATH and LD_LIBRARY_PATH just once.
Try shorter sleeps.
2012-03-20 10:51:57 +00:00
Zdenek Kabelac
70d7a8ea41 Update test for dmevent restart
Actually restart was failing for different reason - so pass in proper
location of dmeventd for restart from lvm command and avoid using
the one from /sbin location.

Update pv create test with "" around path.
2012-03-20 10:48:59 +00:00
Zdenek Kabelac
c987c20bb3 Support improperly formated device numbers
There are kernel drivers (smblk) which set '-1' as their device major number.
This number is listed in /proc/devices then - but the kernel itself is using
just 12 bits - thus device is accessible via 4095 - there is posted patch
for 3.4 to fix this behavior (0 for auto allocation was mean to be used).

However to still allow using such devices with older kernels add some code
to use same behavior - so cut 12 bits from the major number from /proc/devices.

For now use log_warn() - maybe the severity of the message could be lowered
to just verbose level.
2012-03-20 10:47:02 +00:00
Peter Rajnoha
45d7997514 Do exit if LISTEN_PID environment variable not correct during systemd handover. 2012-03-16 21:30:53 +00:00
Zdenek Kabelac
d13c952ee4 Add comment about slow startup of dmeventd
Separat lvm dumpconfig on extra line, so the shell trace output
is not mixed with dump.
2012-03-16 19:13:09 +00:00
Zdenek Kabelac
4fd6aae7bf Skip dmeventd restart test
If dmevendt is not in /sbin, do not try to run restarting test.
FIXME: add some workaround for testing.
2012-03-16 19:11:29 +00:00
Zdenek Kabelac
2b6dee707a Regression fix kernel version check
got changed improperly with large update patch.
2012-03-16 19:09:49 +00:00
Zdenek Kabelac
124dec2880 Longer sleep after clvmd start 2012-03-16 19:08:09 +00:00
Peter Rajnoha
f4dd47242e Use pvscan --cache instead of vgscan in lvmetad init script and systemd unit. 2012-03-16 15:12:04 +00:00
Zdenek Kabelac
268ec5553e Switch condition for dmeventd check 2012-03-16 13:44:51 +00:00
Zdenek Kabelac
1238935a2d Update tests
Indent

Shell improvements - use internal function for checks

Use PVs in ""   (LV and VG cannot have spaces)

Several test very starting 'dmeventd' without annoucing
it via prepade_dmeventd.

Fix some of test actually.
2012-03-16 13:00:05 +00:00
Zdenek Kabelac
6ef0f403c7 Update test.sh check.sh utils.sh get.sh
Indent
Better shell usage
Function simplification
More usage of 'get' functions
Don't use valgrind tracing for check and get function (faster)
Update shell debugging (PS4, better stacktrace)
Support paths with spaces
Export SCRIPTNAME for external usage
Watch for dmeventd unexpectedly started during test
2012-03-16 12:59:43 +00:00
Zdenek Kabelac
523910032b Update aux.sh lvmwrapper
Indent

Add valgrind support:
 env LVM_TEST_VALGRIND={0123} (the higher level, more commands tested)
 env LVM_TEST_CLVMD=1   runs clvmd within valgrind.
 env VALGRIND  script name executed for each lvm command (def. is valg).

Smarted teardown - should minimize occurence of left dev entries
 (using dmsetup remove -f only as last resort)
 sort removed devices by open count before actual removal

Use "" around string that may contain spaces.

Set log/verbose and activation/retry_deactivation to defined value.

Remove  debug.log after successful lvm command (easier to check output).
2012-03-16 12:59:02 +00:00
Zdenek Kabelac
6960f5402a Test suite skip md_detection
Not normally needed - lets' enable it only for test that
really needs it.
2012-03-16 12:57:28 +00:00
Zdenek Kabelac
b71339435d Allow to use also special prefixed names for test
Currently we could not test special prefixes in our test suite.
As teardown will not find such device and basicaly busyloops here,
as at cannot remove such names.

This patch adds possibity to use:

vgcreate  V_$vg1 $dev

Note: you still need to use $PREFIX somewhere in the name.
(And of course, it's really bad idea to use $PREFIX (=LVMTEST)
 for normally used LVs)

The only purpose of this patch is to allow testing cluster with
special vg names that begins with V_ , P_....
2012-03-16 12:56:29 +00:00
Zdenek Kabelac
de0e4e755a Fix string parsing
Fix propagation of -e option - pass it via internal shell variable.
Fix parsing of /proc/mounts files (don't check for substrings).
 as reported by O.Mangold with suggested patch:
 https://www.redhat.com/archives/linux-lvm/2012-February/msg00030.html
Properly pass arguments with spaces ("$@")
Add validation for YES and EXTOFF variable content.
2012-03-16 12:53:05 +00:00
Petr Rockai
3f5b1f651a Relax the active-minor test a bit. 2012-03-16 11:36:51 +00:00
Petr Rockai
2d94d29def It's new. 2012-03-16 10:46:25 +00:00
Petr Rockai
c3b58ec989 Fix a regression in handling --major/--minor arguments to lvcreate & lvchange,
by allowing arg_int_value to be used with groupable options.
2012-03-16 10:43:52 +00:00
Jonathan Earl Brassow
f19be6776b Fix name conflicts that prevent down-converting RAID1 when specifying a device
When down-converting a RAID1 device, it is the last device that is extracted
and removed when the user does not specify a particular device.  However,
when a device is specified (and it is not the last), the device is removed and
the remaining sub-LVs are "shifted down" to fill the hole.  This cause problems
when resuming the LV because if the shifted devices were resumed (and thus
renamed) before the sub-LV being extracted, there would be a name conflict.
The solution is to resume the extracted sub-LVs first so that they can be
properly renamed preventing a possible conflict.

This addresses bug 801967.
2012-03-15 20:00:54 +00:00
Peter Rajnoha
d7bcb6e117 Remove dmeventd fifos on exit if they are not managed by systemd. 2012-03-15 08:45:55 +00:00
Zdenek Kabelac
6584221843 Update configure change
Always define THIN_CHECK_CMD define - since for now we compile thin code
in more places.
2012-03-14 19:25:04 +00:00
Zdenek Kabelac
8d46025ff8 Just move declaration 2012-03-14 17:15:22 +00:00
Zdenek Kabelac
552f340ff5 Use proper "" for absolute paths 2012-03-14 17:14:33 +00:00
Zdenek Kabelac
def8f31a2e Cleanup for conf file 2012-03-14 17:13:45 +00:00
Zdenek Kabelac
bac0394dc6 Improve thin_check option passing
Update a way we handle option passing - so we now support path and options
with space inside.
Fix dm name usage for thin pools with '-' in name.
Use new lvm.conf option thin_check_options to pass in options as string array.
2012-03-14 17:12:05 +00:00
Zdenek Kabelac
303b44c8fa Add --with-thin-check configure option
If specified - use given path without test (Path could be empty)
If autodetection is in use - check for command in available PATH.
2012-03-14 17:09:00 +00:00
Peter Rajnoha
23a88f868d Use SD_ACTIVATION env. var. in systemd units to better detect systemd in use.
LISTEN_PID and LISTEN_FDS environment variables are defined only during systemd
"start" action. But we still need to know whether we're activated during
"reload" action as well - we use the reload action to call "dmeventd -R"/"lvmetad -R"
for statefull daemon restart. We can't use normal "restart" as that is simply
composed of "stop" and "start" and we would lose any state the daemon has.
2012-03-14 15:51:51 +00:00
Peter Rajnoha
682be508c6 WHATS_NEW 2012-03-14 12:12:21 +00:00
Peter Rajnoha
4a01bb64fb Do not run a new dmeventd instance on restart if there's no existing one. 2012-03-14 11:16:00 +00:00
Alasdair Kergon
1b80b1086f Fix error message when pvmove LV activation fails with name already in use. 2012-03-13 20:21:26 +00:00
Petr Rockai
cbf86d952b Add a toplevel check_lvmetad target. 2012-03-12 15:05:55 +00:00
Zdenek Kabelac
91c0dc527b We need longer window for checking read from fifo.
Seems like some dmeventd startups are taking more time then expected,
so make the time window longer here.
2012-03-12 14:46:53 +00:00
Zdenek Kabelac
2412ede4c6 Removing call of release_vg(NULL)
Since we are in error path were vg must be always NULL,
skip call of release_vg() like we do in other places.
2012-03-12 14:43:12 +00:00
Zdenek Kabelac
b6476cba88 Using %u modifier to print unsigned values. 2012-03-12 14:41:42 +00:00
Zdenek Kabelac
4a7bc37bfa Better structure layout for device_info
Save some relocation entries and use directly char[].
Since we do not need yes more then 127 partitions per device, use just int8_t.
Move lvm_type_filter_destroy into local static function.
2012-03-12 14:40:41 +00:00
Zdenek Kabelac
57e379d5ee Simplify error path code for filter initialization
Use 'int' counter.
Use 'bad' with goto_bad macro.
2012-03-12 14:35:57 +00:00
Zdenek Kabelac
80a1c93421 Improve harness code
Support timestamping with harness -  using VERBOSE=2
Fix also logging in several situation
(i.e. continue logging multiple test in VERBOSE mode,
do not coredump with empty output).
2012-03-12 14:24:15 +00:00
Zdenek Kabelac
1ce7924bbf Switch to normal log_verbose message
Here it's not an error case - so do not push this message to stderr.
2012-03-12 14:18:28 +00:00
Zdenek Kabelac
8a9c1b37ad Fix error path for create_toolcontext
Never return unfinished toolcontext - since error path is hit on
various stages of initialization we cannot leave it partially uninitialized,
since we would need to spread many more test across the code for config_valid.
Instead return NULL and properly release udev library resources as well.
2012-03-12 14:15:04 +00:00
Peter Rajnoha
c1ce99cd81 Detect the lvm binary path in lvmetad udev rules.
We can't use 'DM_SBIN_PATH'. This one is set only for DM devices but not
for all block devices - the pvscan is run on all relevant block devices!

This LVM_SBIN_PATH (as well as DM_SBIN_PATH) detection should be removed
eventually but for upstream solution, we still have to do that as there are
known cases where the binaries are put either in /sbin or /usr/sbin
(some installation systems, initrd systems etc.).
2012-03-12 08:59:55 +00:00
Milan Broz
166393a6dd Remove some whitespaces.
(Test commit.)
2012-03-10 10:39:28 +00:00
Milan Broz
06a9a4e867 Remove some whitespaces.
(test commit)
2012-03-10 09:32:46 +00:00
Peter Rajnoha
6b85d4df44 Do not unlink the socket on daemon shutdown if activation is done via systemd. 2012-03-08 14:54:05 +00:00
Peter Rajnoha
c2d7d97301 Add 'Restart: on-abort' for lvmetad systemd service.
Restart lvmetad automatically if it exits due to reception of uncaught signal,
leaving a track in systemd log/syslog.
2012-03-08 09:49:09 +00:00
Zdenek Kabelac
dd121cf5ff Fix warn message and update man page
Fix regression in man page. The chunk size is in kilobyte units on command line
input though in the source code we work with sector size unit
so make it clear in the man page.

Update chunksize for thin pool in man page - it's max value is 1024M == 1G.

Fix warning range message to show proper max value.
2012-03-06 09:22:02 +00:00
Alasdair Kergon
418a0b6080 post-release 2012-03-06 04:47:37 +00:00
Alasdair Kergon
35d8953142 update reworded string 2012-03-06 03:16:05 +00:00
Alasdair Kergon
393cfa1656 close ] 2012-03-06 03:11:13 +00:00
Alasdair Kergon
bb0234a63a Change pvscan --cache syntax 2012-03-06 02:54:31 +00:00
Alasdair Kergon
9d36f3e765 pre-release 2012-03-06 02:50:40 +00:00
Alasdair Kergon
76161a80bf Adjust wording 2012-03-06 02:42:31 +00:00
Alasdair Kergon
5e506c84f6 re-word warnings to be clearer to user 2012-03-06 02:39:25 +00:00
Alasdair Kergon
9f354a1cae Switch pvscan --cache major:minor to --major --minor. 2012-03-06 02:30:49 +00:00
Zdenek Kabelac
61fe203e7d Some more missing supposedly 64bit operations.
Avoid use 32bit math for extent_size.
2012-03-05 15:05:24 +00:00
Peter Rajnoha
33213a3e8e Use already acquired variable... 2012-03-05 14:45:00 +00:00
Zdenek Kabelac
3cf63e35bf Fit thin pool metadata into 128MB
If the lvcreate may decide some automagical values for a user,
try to keep the pool metadata size into 128MB range for optimal
perfomance (as suggested by Joe).

So if the pool metadata size and chunk_size were not specified,
try to select such values they would fit into 128MB size.
2012-03-05 14:19:13 +00:00
Zdenek Kabelac
ac2ac815f1 Improve warning
Use thin_dump --repair suggestion in log error message
and use just warning on  deactivation path without repair info
(since node has been deactivated).

Also check whether there is not 16 args for thin_check configured.
2012-03-05 14:15:50 +00:00
Zdenek Kabelac
a31a947d49 Use 64bit math
Prevent 32bit overflow and resulting weird error reports when working
with TB sizes..
2012-03-05 14:12:57 +00:00
Peter Rajnoha
9a988ab420 Add test for name mangling functionality. 2012-03-05 12:49:06 +00:00
Peter Rajnoha
b608869a26 Check for multiple mangled names in auto mangling mode.
Auto mode can't deal with multiple mangled names. We can do that while working
in hex mode, but in auto mode, this would lead to device name ambiguity.
2012-03-05 12:48:12 +00:00
Peter Rajnoha
28d128f1b6 Fix dm_task_get_name_unmangled to not unmangle already unmangled name.
In 'auto' and 'hex' mode, these names are already unmangled on ioctl return.
There's no point on trying to do that once again (actually it's a bug!).
2012-03-05 12:45:43 +00:00
Peter Rajnoha
14a6a1e17e Check whether device names are properly mangled on ioctl return.
Be more strict when unmangling names on ioctl return - require the name to be
properly mangled in 'auto' and 'hex' mode. There really should not be any
blacklisted character since the names should be renamed already (by means of
renaming it directly or running 'dmsetup mangle' for automatic rename).
2012-03-05 12:43:03 +00:00
Peter Rajnoha
0711a50005 Clean up internal mangling interface. 2012-03-05 12:40:34 +00:00
Zdenek Kabelac
913a0dbb3a Validate udev structures
Avoid using NULL pointers from udev. It seems like some older versions of udev
were improperly returning NULL in some case, so do not silently break here,
and give at least a warning to the user.
2012-03-04 17:40:59 +00:00
Zdenek Kabelac
4600444786 Return success for deactivation of thin pool
if the thin_check fail on thin pool - still return successful deactivation,
since lvremove would currently fail.

TODO: find some way to not run check with lvremove.
2012-03-04 17:36:23 +00:00
Zdenek Kabelac
24cbb72291 Remove part of FIXME
(and reindent a code below)
2012-03-04 16:05:42 +00:00
Zdenek Kabelac
bebbb5aff9 Update thin test for thin_check
Test if thin_check is present in system and disable its use, when its missing.

Add testing for poolmetadatasize.

FIXME: Allocation policy for metadata pool might need some relaxing.
(For now it needs to put all block on one PV.)
2012-03-04 16:04:52 +00:00
Zdenek Kabelac
e04c6e4972 Speedup test run by a few minutes
Reduce disc excercise for some test and focus on LVM testing by
using smaller extent size.

Reduce number of teardown_devs calls and use vg/lvremove instead.

Don't sleep for seconds on pvmove.

FIXME: shell/lvconvert-mirror-basic.sh seems to need more checking.
Test fails for smalled extent size then 512k.
2012-03-04 16:02:19 +00:00
Zdenek Kabelac
3fb6ddf3ce Add FIXMEs
Processing loop needs some thinking
2012-03-04 15:58:31 +00:00
Zdenek Kabelac
301c011a92 Just make error message more clear
Make more obvious, the origin LV for snapshot must be active.
2012-03-04 15:57:27 +00:00
Alasdair Kergon
41b738c1e4 Scan all devices for lvmetad if 'pvscan --cache' used without device list. 2012-03-03 18:32:53 +00:00
Alasdair Kergon
e66e524898 Obtain VG list from lvmetad before relying on get_vgs, otherwise PVs in VGs
don't appear.
2012-03-03 17:12:21 +00:00
Alasdair Kergon
f723e0396e Reinstate accidentally-deleted line. 2012-03-03 17:03:20 +00:00
Alasdair Kergon
eed642e0b3 post-release 2012-03-03 02:08:37 +00:00
Alasdair Kergon
eb706b3106 . 2012-03-03 01:28:15 +00:00
Alasdair Kergon
9a26228d28 pre-release 2012-03-03 01:00:49 +00:00
Zdenek Kabelac
e18ea55fef Code refactoring
Properly test for dm_asprintf result.
Keep unlocking of mutex in the same function and do not spread lock and
unlock over functions.
2012-03-02 23:01:10 +00:00
Zdenek Kabelac
059ff7d5ff Just space moving
Don't leave space on EOL.
2012-03-02 22:58:23 +00:00
Zdenek Kabelac
00422596c1 List _thread_registry missed mutex
Operation on _thread_registry needs to be covered by mutex.
Cosmetic move a die code after free for valgind short leak list.
2012-03-02 22:57:25 +00:00
Alasdair Kergon
d177da6b98 a fixme 2012-03-02 22:44:31 +00:00
Zdenek Kabelac
299b4751c2 Support 16GB for thin pool metadata
Add some hack math to allow 16GB devices to be passed as thinpool metadata.
Since kernel has put in limit to not allow which are just bigger then
some predefined constant in kernel but not matching 16GB so any device bigger
is rejected.

FIXME: Current code still might need more tweaks to be more generic.
2012-03-02 21:53:17 +00:00
Alasdair Kergon
7c47bdc5ec fix non-lvmetad pvscan macro 2012-03-02 21:50:02 +00:00
Zdenek Kabelac
6859106e2a Add support for thin check
Use libdm callback to execute thin_check before activation
thin pool and after deactivation as well.

Supporting thin_check_executable which may pass in extra options for
the tool.
2012-03-02 21:49:43 +00:00
Zdenek Kabelac
bfb8a95111 Purge remaining trim bits from code 2012-03-02 21:43:26 +00:00
Alasdair Kergon
1a002bbe59 missing reply frees 2012-03-02 21:24:37 +00:00
Alasdair Kergon
598134b8e7 Pass struct device around internally rather than dev_t.
Add 3rd daemon return state "unknown" for lookups that are carried out
successfully but don't find the item requested.
Avoid issuing error messages when it's expected that a device that's
being looked up in lvmetad might not be there.
2012-03-02 20:46:36 +00:00
Zdenek Kabelac
cbc8dedafe Merge metadata size checking
Move the code for poolmetadatasize operation into one place.
Report override for minimum and maximum size.
Drop _read_thin_params function its error reporting is handled elsewhere.
2012-03-02 20:18:25 +00:00
Alasdair Kergon
fdfa12d956 Change pvscan --lvmetad to pvscan --cache. 2012-03-02 18:09:46 +00:00
Zdenek Kabelac
34b6b9d0d7 Added dm_tree_node_set_callback() for preload and deactivation hooks
Run users hook after preload for the node is finished,
or after the node has been deactivated.
2012-03-02 17:31:21 +00:00
Zdenek Kabelac
197d0d4af6 Fix estimation of pool metadata device size
If no size was give the later added minimal size check efectively
disable this code. Also the argument for size now must be kept
in sector_size, so adding division by SECTOR_SIZE (moved into
a const expression)
2012-03-02 17:25:21 +00:00
Alasdair Kergon
e3e0811b67 Allow multiple device names with pvscan --lvmetad.
Hold global lock in pvscan --lvmetad.  (This might need refinement.)
Add PV name to "PV gone" messages.
Adjust some log message severities.  (More changes needed.)
2012-03-02 16:58:41 +00:00
Zdenek Kabelac
7659a953a7 Remove support for TRIM message
It's been unsupporte for now - and it's not going to be
implemented for thin pool kernel driver - so dropping
appearence of TRIM from libdm and lvm.
2012-03-02 13:26:08 +00:00
Alasdair Kergon
7e42a312ae Reinstate error if e.g. daemon comms fails. 2012-03-02 02:55:45 +00:00
Zdenek Kabelac
6aff7af424 Wait for mirror in sync
1s could be too short
2012-03-02 00:08:48 +00:00
Zdenek Kabelac
d61c67b52d Check for error from dup2
Do not take correct fd as error result.
2012-03-01 23:20:45 +00:00
Zdenek Kabelac
a18693e705 Add traceback for failpath 2012-03-01 22:55:21 +00:00
Zdenek Kabelac
59c71b93ad Skip zero length messages
In case of zero length message, there would be a memory leak on
return path from _do_process_request.
2012-03-01 22:54:17 +00:00
Zdenek Kabelac
526655301e Check allocated pointers
Test pointers from allocation against NULL.
Error paths should be checked, some of them probably need
some extesions.
2012-03-01 22:52:59 +00:00
Zdenek Kabelac
1eb1626f08 Improve logging
Use %m for strerror.
Switch to use 0 for fail return code.
2012-03-01 22:06:18 +00:00
Zdenek Kabelac
17c3bf7b22 Check pointer before deref in debug message 2012-03-01 21:58:55 +00:00
Zdenek Kabelac
d27834ff0a Explicitely ignore fail from this label_read 2012-03-01 21:57:43 +00:00
Zdenek Kabelac
d102667b6d Add some more pointer validation
Ensure _display_name() and _add_dep() would not deref NULL names.
Switch to use internal dm_basename().
2012-03-01 21:56:44 +00:00
Zdenek Kabelac
e922a85c51 Test alloc fail 2012-03-01 21:49:32 +00:00
Zdenek Kabelac
2236c69c42 Replace goto with just log_debug
FIXME: temporal quick hack to make things green
There might be a better reaction.
2012-03-01 21:46:31 +00:00
Zdenek Kabelac
a75b0d4ee6 Check for alloc error
Simplify segtype_str usage and check for NULL segtype.
2012-03-01 21:21:54 +00:00
Zdenek Kabelac
f996955304 Log sys error for lseek 2012-03-01 21:19:20 +00:00
Zdenek Kabelac
27088d51a2 Debug log for hold_lock failure 2012-03-01 21:18:38 +00:00
Zdenek Kabelac
73ef9a7eaa Log sys errors 2012-03-01 21:17:29 +00:00
Zdenek Kabelac
48073da2b8 Backtrace for failing drop_cached_metadata 2012-03-01 21:16:44 +00:00
Zdenek Kabelac
657826a255 Check for udev_get_dev_dir result
Don't use NULL return value.
2012-03-01 21:16:05 +00:00
Zdenek Kabelac
465fe71f6c Correct enum type
Using debug_t and some forgetten alloc_policy_t, force_t from past commit.
2012-03-01 21:14:43 +00:00
Zdenek Kabelac
0b60d3a05e Few more close and dev_close trace
Adding (void) where we cannot really report an error.
2012-03-01 21:12:37 +00:00
Petr Rockai
d84d5b113e Use 64 bit integers for device size & label sector in _pv_populate_lvmcache in
lvmetad client code. Fixes RHBZ 798267.
2012-03-01 20:04:44 +00:00
Petr Rockai
10dab1c621 Add a regression test for the PSize bug in pvs/lvmetad. 2012-03-01 20:03:45 +00:00
Petr Rockai
31128d8747 Use 64 bit integers whenever extracting numbers from daemon replies. 2012-03-01 19:54:53 +00:00
Jonathan Earl Brassow
db424996be s/CPG_/CS_: Various CPG constants are going away, even though CPG itself stays
F17 is getting rid of OpenAIS libraries (and checkpointing).  While the
CPG stuff is staying, some if its constants are being removed.  So, we
must adjust and use the remaining constants which the CPG constants were based on.

[~]# egrep 'CPG_DISPATCH_ALL|CPG_OK' /usr/include/*/*
corosync/corotypes.h:#define CPG_DISPATCH_ALL     CS_DISPATCH_ALL
corosync/corotypes.h:#define CPG_OK               CS_OK
2012-03-01 17:41:39 +00:00
Zdenek Kabelac
86d71759d0 Missed ; 2012-03-01 10:46:39 +00:00
Zdenek Kabelac
fa750fd0c5 More useful debug message 2012-03-01 10:41:48 +00:00
Zdenek Kabelac
6ebad37e9b Add _rimage as reserved suffix 2012-03-01 10:39:21 +00:00
Zdenek Kabelac
05a2d7bcfd Add cast for ptrdiff_t - cleanup gcc warning 2012-03-01 10:31:35 +00:00
Zdenek Kabelac
6732bedecb Improve error logging
Log errors instead of plain return 0.
Check for f->private strdup result.
2012-03-01 10:30:48 +00:00
Zdenek Kabelac
041c69362d Add stack trace for lv_reduce
Use common code call with stack trace.

TODO: maybe the release_lv_segment_area()
should be actually able to return error code to upper level.
2012-03-01 10:09:36 +00:00
Zdenek Kabelac
c53667ac45 Check for version string buffer
Since lvm seems to call driver_version(NULL, 0)  this would lead
to crash. Though the combination of the code is probably very hard to hit.
If the user doesn't supply version buffer, just skip printing to buffer.
2012-03-01 10:07:38 +00:00
Zdenek Kabelac
c9ab8ce0a3 Check for allocation error
return ENOMEM when malloc fails.
2012-03-01 09:54:23 +00:00
Zdenek Kabelac
f5de369e04 Log fail of pthread_join
Syslog error outside of mutex section.
2012-03-01 09:50:04 +00:00
Zdenek Kabelac
53bdfe124b Fix leak of FID structure 2012-03-01 09:46:38 +00:00
Zdenek Kabelac
052269aa2c Wipe initial 4KiB for non-zeroed thin volumes
If the thin pool has disabled zeroing  (created with -Zn), we at least
clear initial 4KiB of such thin volume (provisions 1st block).

If lvcreate is executed with '-an' command will abort (same way like we for
normal LV - however for normal LV option -Zn may skip clearing completely,
for thin volumes this option is not supported (applies only for pools).
2012-02-29 22:08:57 +00:00
Jonathan Earl Brassow
50bdd70043 Allow cluster mirrors to handle the absence of the checkpoint lib (libSaCkpt).
The OpenAIS checkpoint library is going away; therefore, cmirrord must
operate without it.  The algorithms the handle the timing of when to send
a checkpoint, the determination of what to send, and which ongoing cluster
requests are relevent with respect to the checkpoints are unaffected.  We
need only replace the functions that actually perform the storing/transmitting
and retrieving/receiving of the checkpoint data.  Rather than store the
checkpoint data in an OpenAIS checkpoint file, we simply transmit it along
with the message that notifies the incoming node that the checkpoint is
ready.
2012-02-29 21:15:34 +00:00
Alasdair Kergon
6508cc6805 Pass 'single_device' parameter down to suppress 'Can't find uuid' messages
when reading VG text metadate and called from pvscan --lvmetad.

(Longer-term, that check needs moving outside of that code.)
2012-02-29 02:35:35 +00:00
Petr Rockai
942144f4a3 Fix a whitespace bug in the last checkin. 2012-02-29 00:19:14 +00:00
Petr Rockai
ce80d6e401 Attempt a fix for lvm shell accumulating copies of orphan PVs with each "pvs"
invocation.
2012-02-29 00:18:27 +00:00
Alasdair Kergon
193d697ba4 expose new include files to build 2012-02-28 18:35:32 +00:00
Alasdair Kergon
ba23f9e71f Reflect new file locations, include file updates etc. 2012-02-28 18:35:04 +00:00
Alasdair Kergon
3af3a9a68b remove old makefile 2012-02-28 18:31:28 +00:00
Alasdair Kergon
09977e0364 move daemon files 2012-02-28 18:30:39 +00:00
Alasdair Kergon
33af17af76 new makefiles 2012-02-28 18:26:40 +00:00
Alasdair Kergon
2f9161a1e2 Series of commits to move daemon code into separate top-level dir and
fix build deps etc.
2012-02-28 18:25:48 +00:00
Alasdair Kergon
b10489f568 Invert some return codes (which used to get ignored anyway) in cases when
lvmetad is not being used.
2012-02-28 18:23:56 +00:00
Alasdair Kergon
0c598c8270 Bypass lvmetad fns when not configured to use lvmetad. 2012-02-28 18:22:52 +00:00
Alasdair Kergon
d7d948eacc Add LVMETAD_SUPPORT 2012-02-28 18:20:58 +00:00
Alasdair Kergon
dea586a587 If lvmetad fails, don't set _has_scanned 2012-02-28 18:20:17 +00:00
Alasdair Kergon
d7ca49059f Define LVMETAD_SUPPORT in source code 2012-02-28 18:18:48 +00:00
Alasdair Kergon
830057e338 spacing 2012-02-28 18:17:37 +00:00
Alasdair Kergon
98711b45c7 I don't think -u is meant to be used with lvmetad 2012-02-28 18:16:23 +00:00
Alasdair Kergon
b009e2235e Check return values after calling new lvmetad fns
(Haven't checked error path handling though)
2012-02-28 18:08:08 +00:00
Alasdair Kergon
7a3de26248 Fix empty string warning logic in _find_config_str. (1.02.68)
pvcreate gives
WARNING: Ignoring unsupported value for metadata/pvmetadataignore.

It was warning if there is no config file entry instead of only if the node
exists but is empty.
2012-02-28 17:46:47 +00:00
Zdenek Kabelac
505c0cbb2e Check for failing dm_asprintf 2012-02-28 14:25:37 +00:00
Zdenek Kabelac
f2cbcb027e Using enum types for enums
alloc_policy_t, dm_string_mangling_t, percent_range_t, sign_t
2012-02-28 14:24:57 +00:00
Zdenek Kabelac
7ed8790301 Revert free of allocated segtype
lvm_register_segtype takes ownership of segtype and call destructor
for it in error path.
2012-02-28 14:23:41 +00:00
Peter Rajnoha
3db3d4d895 LVMetaD socket operates in non-blocking mode. 2012-02-28 13:07:03 +00:00
Peter Rajnoha
41d8247b51 Add support for systemd socket handover for common daemon-server code and also add support for new OOM killer adjustment interface.
This code is already a part of dmeventd, but it's modified slightly to check
sockets instead of FIFOs.
2012-02-28 13:05:21 +00:00
Zdenek Kabelac
a22be5e2e9 Test dm_hash_insert() failures mem failures 2012-02-28 11:12:58 +00:00
Zdenek Kabelac
0098904026 Check error from _lv_each_dependency
_lv_mark_if_partial_collect cannot fail, however it's good to keep
checking here as we do in all other cases.
2012-02-28 11:10:45 +00:00
Zdenek Kabelac
c0d7975bb2 Check result of export_vg_to_buffer
and avoid usage of failed buffer.
2012-02-28 11:09:06 +00:00
Zdenek Kabelac
22266ea325 Add stacktrace for init_lvmcache_orphans fail path 2012-02-28 11:08:04 +00:00
Zdenek Kabelac
77e9346a06 Ensure clvmd message is always \0 terminated
Drop whole buffer clearing (most messages at <100 bytes).
Just make sure we have always \0 terminated string for strlen() operations.
(before for PIPE_BUF sized messages this was not set).
2012-02-28 11:06:56 +00:00
Zdenek Kabelac
c998f7d27a Better detection of missing dmeventd fifo connection 2012-02-28 11:03:24 +00:00
Zdenek Kabelac
c9d0627af6 Missed to properly merge patch for definition of MAX_MISSING_LEN
(fix previous commit)
2012-02-28 10:42:20 +00:00
Zdenek Kabelac
49d8753caf Add log_sys_error for close 2012-02-28 10:14:06 +00:00
Zdenek Kabelac
fdc31c7a8a Add some stack traces for dev_close error paths 2012-02-28 10:11:35 +00:00
Zdenek Kabelac
535fbc2256 Explicitely ignore errors in label_read iteration.
We don't want stack traces here, so mark as void.
2012-02-28 10:10:07 +00:00
Zdenek Kabelac
9cdfa9e854 Explicitely check list size of segments
instead of checking for NULL from last_seg and first_seg.
2012-02-28 10:08:20 +00:00
Zdenek Kabelac
c3cb4bb872 Duplicate standard in/out descriptors for daemon
Addressing somewhat tricky bug here.
Since stdin,stdout,stderr were closed it's been occasionally possible to
see some unexpected messages to be flowing into a clvmd and generating some
randomly sized allocation of many megabytes. Since the message was not
being generated by standard send_message() construction, after some more
testing it apperead to be a debug log message - thus something has flown
to local socket opened on strandard out descriptor.

To fix the issue - use standard file descriptor duplication code for daemons.

For making easier debugging of polling daemon - developer might want to recompile
without modifition of standard file descriptors.
2012-02-28 10:06:53 +00:00
Zdenek Kabelac
da0f745cb6 Limit max size of clvmd message
This could be seen as some sort of simple validation - it's not easy to
recognize a valid message for now - but we definitely do not want to
allocate a lot of megabytes in  clvmd memory locked daemon when broken
message gets in.

Size of 8000 is just selected for now - possibly there could be much
lower value put in.
2012-02-28 09:58:19 +00:00
Zdenek Kabelac
9ef8d6617e Do not send uninitilised bytes
Use struct initalizers to fill struct members and at the same time have
all unspecified members set to 0.
2012-02-28 09:53:55 +00:00
Peter Rajnoha
0fd3ed18f1 Fix dm_task_set_name to properly resolve path to dm name.
Wrong variable was checked in _dm_task_set_name_from_path.
2012-02-28 08:36:51 +00:00
Peter Rajnoha
60d0b63380 lvmetad.socket 2012-02-27 15:45:49 +00:00
Zdenek Kabelac
889007ee2c Do not hide deallocation of buffer
As API is passing structures by value, do not leave
the function which created buffer and keeps valid pointer
look like it would be some memory leak and move
free of buffer from inner function - makes more obvious,
how is the memory management handled.
2012-02-27 11:49:16 +00:00
Zdenek Kabelac
95ad3abc76 Add FIXME for misuse in case of dm_event_mask enum.
Using enum for bitmask is not proper solution here.
2012-02-27 11:46:25 +00:00
Zdenek Kabelac
cd1f056f20 Use unsigned type for bitmask
Using report_type_t for bitmask is not correct, since we have not defined types
for all  bit combinations - so switching to unsigned type, since values of
report_type_t enum are unsigned.
2012-02-27 11:45:05 +00:00
Zdenek Kabelac
faf8ad30f1 Just code move of hash initialization in front of function
Make sure both hash tables are initialized before _read_sections() call.
Presents no functional change (since PV scan phase was not adding LV hashes),
but makes the code easier to handle mem failing case, and static analyzer is
hapier as well.
2012-02-27 11:40:58 +00:00
Zdenek Kabelac
4960a3c7f3 Add stack traces for lock_vol failures
Adding at least stack traces with some FIXMEs for cases,
where we might want to do something cleaver - maybe fail command
or give user hints something is not going well ?

For remote_backup is stack probably 'good' enough for now.
2012-02-27 11:35:59 +00:00
Zdenek Kabelac
3b45ebe86b Check id_write_format result
Currently we never fail with 64byte uuid buffer,
but just stay consitent with rest of the code and check for result.
2012-02-27 11:32:48 +00:00
Zdenek Kabelac
d7bb0d77aa Explicitely ignore fail error on hash_insert
We cannot do anything better here anyway - we are already in logging function,
so just ignore this issue here - it will most likely stop application later.
2012-02-27 11:31:30 +00:00
Zdenek Kabelac
125456bb3e Add some log_sys_errors to close() call 2012-02-27 11:28:47 +00:00
Zdenek Kabelac
e51cc452d2 Nicer cleanup of excl_uuid hash
Since it on exit path, it's not a big difference,
but makes less noise in analyzer and valgrind.
2012-02-27 11:26:25 +00:00
Zdenek Kabelac
4f443a0d0c Check for vg_name existance
Since vg_read() mda ops could be called with NULL vg_name,
check it before derefence also for pool and format1.
2012-02-27 11:23:15 +00:00
Peter Rajnoha
7ed9336202 Systemd is linux-specific - move the supporting code under the 'ifdef linux'.
Some 'defines' used in this specific code were already under an ifdef so this
patch just completes it.
2012-02-27 11:13:51 +00:00
Zdenek Kabelac
0027de97c9 Fix missing break
Bug introduced with addition of internal error default case.
Seem like this code is not used.
TODO: add coverage test.
2012-02-27 11:13:48 +00:00
Zdenek Kabelac
6ddd3f5d4c Add assert for oldname
Code cannot proceed if oldname would be NULL.
Since lvmetad currently doesn't use logging mechanism of lvm to report
internal errors - stay with current code style of lvmetad which uses
plain asserts for cases like this.
2012-02-27 10:19:00 +00:00
Zdenek Kabelac
90f31fc31c A bit more safe version of sprintf
Use just buffer size limit (it's used for debug only)
2012-02-27 10:17:06 +00:00
Zdenek Kabelac
061b3169f9 Test seg pointer for non-null
As the function accepts NULL for 'seg' parameter,
check for it before dereference.
2012-02-27 10:15:08 +00:00
Zdenek Kabelac
e2b77ed33c Move allocation after check for vgid
so there is no mem leak on this error path.
Also actually check if the hash exists.
2012-02-27 10:10:43 +00:00
Zdenek Kabelac
b85b9ce4b7 Make force_t enum usage obvious 2012-02-27 10:06:58 +00:00
Zdenek Kabelac
edb0de6539 Test result of _init_tags. 2012-02-27 10:05:35 +00:00
Zdenek Kabelac
1b197085b3 Explicitely ignore result from sync_unlock()
Make it obvious to analyzer - we can't do anything better here anyway.
2012-02-27 10:02:17 +00:00
Zdenek Kabelac
61a6481305 Always check result of _set_vg_name() 2012-02-27 10:00:23 +00:00
Zdenek Kabelac
dc92528f10 Drop uname call, it's not used from gulm era. 2012-02-27 09:58:18 +00:00
Zdenek Kabelac
fe84afbbab Check allocation result 2012-02-27 09:56:27 +00:00
Zdenek Kabelac
9b0c352a57 Remove test for pvid
Since pvid is char buffer[] and not pointer,
there is no point to check it for NULL.
2012-02-27 09:54:25 +00:00
Zdenek Kabelac
c1a04f0fe3 Ignore result of unlock explicitely 2012-02-27 09:52:37 +00:00
Zdenek Kabelac
42c3f42474 Just reindent with tabs 2012-02-27 09:51:31 +00:00
Peter Rajnoha
460605cf71 Use 'killproc' fn in lvmetad init script (fn provided by 'functions' init script library). 2012-02-27 08:56:46 +00:00
Petr Rockai
c401303ca2 Fix a possible SEGV in lvmetad client code, and a minor leak in pvscan
--lvmetad.
2012-02-26 13:42:50 +00:00
Petr Rockai
9656be3deb A bit of documentation for lvmetad_vg_lookup. 2012-02-26 08:50:13 +00:00
Petr Rockai
e91a7700eb - Improve error reporting on lvmetad connection failure.
- Fix a couple of memory leaks in the lvmetad client code.
- Avoid an error in lvmetad_pv_gone when we aren't using lvmetad.
2012-02-26 08:49:40 +00:00
Petr Rockai
50730ddd96 Improve error handling & reporting in common daemon code. 2012-02-26 08:46:28 +00:00
Petr Rockai
7ce18df26b Add tentative documentation of pvscan --lvmetad to the pvscan manpage. 2012-02-26 08:45:07 +00:00
Peter Rajnoha
be5d2c75f3 Also remove intermediate lvmetad init script file. 2012-02-24 15:07:00 +00:00
Peter Rajnoha
dc7d369ad8 Use '-p PID_FILE' for the status call since the pidfile is in its own lvm subdir and return proper status code. 2012-02-24 13:45:37 +00:00
Peter Rajnoha
9a3497dcfa The daemon as well as the first scan must be both executed properly, otherwise it's an error! 2012-02-24 13:11:57 +00:00
Peter Rajnoha
88a898d6cf Add LVMetaD init script. 2012-02-24 13:03:50 +00:00
Peter Rajnoha
a80e4e0d1f Add skeleton for lvmetad udev rules - 69-dm-lvm-metad.rules.
Why using the order 69:

  - Storage processing in general happens in 60-persistent-storage.rules,
    including the blkid call that adds some usable information we can use
    for filtering and speedup (these rules are part of upstream udev and
    the order is preserved on most distros)

  - There's still some other storage-related processing done after
    60-persistent-storage.rules in general. These might add some detailed
    storage-related information we might use to filter devices effectively
    (e.g. MD udev rules, ...).

  - We need lvmetad rules to be processed before any consumers can use the
    output - so the metadata cache is ready soon enough (e.g. udisks rules).

  - There's no official (upstream udev) document about assigning the order,
    so this number is chosen in best belief it will suit all scenarios.
2012-02-24 09:53:12 +00:00
Petr Rockai
7bc76e9c20 Fix server-side leaks in lvmetad. 2012-02-24 00:24:37 +00:00
Petr Rockai
1336a1cbf1 Clean up the lvmetad state more thoroughly upon shutdown. 2012-02-24 00:11:59 +00:00
Petr Rockai
6295e3ef1e Fix an error path in daemon_open. 2012-02-24 00:02:54 +00:00
Petr Rockai
e8681b8d42 Make the calls to pvscan --lvmetad in the testsuite slightly more civilised. 2012-02-23 23:58:42 +00:00
Petr Rockai
ed5d30f075 Couple of improvements in the daemon (common + lvmetad) code:
- some client-side memory leak fixes
- announce and check protocols and protocol versions
2012-02-23 23:52:11 +00:00
Zdenek Kabelac
75ac0b638c Limit sscanf params with size
Make sure parsed string fits given char buffer.
2012-02-23 22:50:50 +00:00
Zdenek Kabelac
e3a47a9370 Unit testing for some string libdm functions
TODO: more functions need to be unit tested.
2012-02-23 22:47:17 +00:00
Zdenek Kabelac
c2915dd1ed Introduce dm_strncpy
Should be faster then strncpy - since we could avoid clearing 4KB pages
with each strncpy(...,PATH_MAX).
Also it's easy to check whether string fit - and eventually avoid
to continue working we incomplete string.
2012-02-23 22:45:43 +00:00
Zdenek Kabelac
aa558eaeb8 Use const for lv
lv_is_active doesn't needs modifiable LV struct so keep it const.

Remove lv_send_message() left bits from code -
they were never released in 2.02.89.
2012-02-23 22:41:57 +00:00
Zdenek Kabelac
8411f22f43 Keep same sign for 's' counter 2012-02-23 22:36:56 +00:00
Zdenek Kabelac
33f24144bd Drop const modifier from fb
It's not useful here to use const - since we need non-const value for unmap.
Also remove now unneeded cast.
2012-02-23 22:36:21 +00:00
Zdenek Kabelac
751e17f17b Add explicit cast for time() ret value
To keep all numbers with same sign
2012-02-23 22:31:23 +00:00
Zdenek Kabelac
2237e93169 Use same signed numbers
Keep unsigned aritmetic.

TODO: we should probably switch dm_split_words() to return unsigned numbers.
(minor API libdm change mostly compatible)
2012-02-23 22:30:20 +00:00
Zdenek Kabelac
19fde08c6a Drop backtrace after log_error
Just a minor change to not give backtrace when log_error has been just
reported.
2012-02-23 22:24:47 +00:00
Zdenek Kabelac
b2edbee763 Remaing code suffling
Move declaration to the front of function to follow coding rules.
2012-02-23 22:23:12 +00:00
Zdenek Kabelac
1e59596393 Fix regression from cleanup commit
baton is being modified, so needs to be cleared before each use.
2012-02-23 19:03:48 +00:00
Alasdair Kergon
7a502785b0 post-release 2012-02-23 18:26:28 +00:00
Alasdair Kergon
2a0264eb7d pre-release 2012-02-23 18:22:09 +00:00
Zdenek Kabelac
17829d245c Missed dm_free in last commit 2012-02-23 18:19:32 +00:00
Zdenek Kabelac
de61ffd1b5 Limit number of mem allocs and copies
If we have good enough glibc to return number of needed chars, do not
loop try to reach good size, but use this size directly for allocation,
saving also last strdup.

Since now we start with 16 bytes - skip buffer realloc for shorter string.
2012-02-23 18:05:12 +00:00
Zdenek Kabelac
0dd5bf2730 Some reformating for lvmetad uddates
cleanup gcc warning,
use PRIu64
header cleanups
const pointer fixes.
2012-02-23 17:59:32 +00:00
Jonathan Earl Brassow
74d0a06513 Require number of stripes to be greater than parity devices in higher RAID.
Also, add some comments to code that I recently added that may be unclear
otherwise.
2012-02-23 17:36:35 +00:00
Petr Rockai
30cc37b645 Add a vgscan to lvcreate-repair.sh. The old test applied device filter hacks to
make devices invisible to lvm, but the behaviour of those is slightly different
than of actual missing devices. Running vgscan after re-enabling the device
triggers a metadata repair which is not done by vgremove -ff. This is not a
regression, merely an odd behaviour that has been around even before lvmetad.
2012-02-23 14:55:29 +00:00
Peter Rajnoha
9d206272a5 Use 'd' instead of 'D' in tmpfiles configuration to not overwrite any existing directory.
...that happens when systemd creates it first (e.g. based on a .socket unit)
before the tmpfiles.d content is executed.
2012-02-23 14:24:30 +00:00
Petr Rockai
af8b3ace37 Kick out a debugging call to valgrind in lvmcache-exercise.sh. 2012-02-23 14:21:18 +00:00
Petr Rockai
4c10334843 Add a missed dependency that is required to pull in dameons/common in an early
part of the build.
2012-02-23 13:58:56 +00:00
Petr Rockai
165a28a8a5 We need daemons/common now even if the lvmetad server side is not built. 2012-02-23 13:41:11 +00:00
Peter Rajnoha
0c7a75df32 Remove the old systemd socket file, we're using the .in template now. 2012-02-23 13:39:08 +00:00
Peter Rajnoha
38c2ccf7b8 Use DEFAULT_RUN_DIR instead of hardcoded value in lvmetad systemd units
and add ExecStartPost=vgscan to actually run the first scan that will
fill the metadata daemon with metadata information.
2012-02-23 13:31:49 +00:00
Petr Rockai
becb604089 Add -ldaemon to make.tmpl.in. 2012-02-23 13:31:01 +00:00
Petr Rockai
c6101ffc0c Missed update for include/.symlinks.in to include lvmetad.h. 2012-02-23 13:19:26 +00:00
Petr Rockai
002f7a0ce2 The lvmetad client-side integration. Only active when use_lvmetad = 1 is set in
lvm.conf *and* lvmetad is running.
2012-02-23 13:11:07 +00:00
Petr Rockai
fa73b2ee3d Also use DEFAULT_RUN_DIR for the lvmetad socket on the client side. 2012-02-23 11:40:24 +00:00
Peter Rajnoha
8acbf570b5 Add LVMetaD systemd units. 2012-02-23 11:24:07 +00:00
Peter Rajnoha
40f111e9b9 Install lvm2 monitoring init script and systemd units only if dmeventd is built. 2012-02-23 11:13:18 +00:00
Peter Rajnoha
7e61f1ef44 Clean intermediate files. 2012-02-23 10:01:12 +00:00
Jonathan Earl Brassow
e10e4a5134 Fix allocation code to allow replacement of single RAID 4/5/6 device.
The code fail to account for the case where we just need a single device
in a RAID 4/5/6 array.  There is no good way to tell the allocation functions
that we don't need parity devices when we are allocating just a single device.
So, I've used a bit of a hack.  If we are allocating an area_count that is <=
the parity count, then we can assume we are simply allocating a replacement
device (i.e. no need to include parity devices in the calculations).  This
should make sense in most cases.  If we need to allocate replacement devices
due to failure (or moving), we will never allocate more than the parity count;
or we would cause the array to become unusable.  If we are creating a new device,
we should always create more stripes than parity devices.
2012-02-23 03:57:23 +00:00
Alasdair Kergon
aa357b8f5d Check all tags and LV names are in a valid form in vg_validate. 2012-02-23 00:11:01 +00:00
Peter Rajnoha
9a94507884 Add lvm2 tmpfiles.d configuration file. 2012-02-22 18:02:31 +00:00
Peter Rajnoha
73c46fa222 Add configure --with-tmpfilesdir and lvm2 tmpfiles.d configuration file itself.
/etc/tmpfiles.d directory holds configuration files for temporary/volatile
files and directories that should be automatically managed. For example,
if we have some parts of the fs hierarchy on tmpfs, we'd like to recreate
some files or directories on every boot so they're always prepared for use.

Systemd can read such configuration files. For now, the lock and run directory
are the ones that are most probably placed on tmpfs. If this is the case, we
can install the configuration by 'make install_tmpfiles_configuration'.
2012-02-22 17:55:10 +00:00
Jonathan Earl Brassow
3fc37a89e2 Allow 'lvconvert --repair' to operate on RAID 4/5/6.
The higher level RAIDs should be allowed for repair along with 'mirror' and
'raid1' segment types.
2012-02-22 17:18:49 +00:00
Jonathan Earl Brassow
fe81ba59e8 *** empty log message *** 2012-02-22 17:14:38 +00:00
Jonathan Earl Brassow
16b40da2e7 Add some messages that indicate completion of RAID device replacement.
There were no messages printed upon completiion of RAID device replacement.
This could cause confusion/concern during automated recovery, because the
user sees the failure messages but no other messages indicating correction.
2012-02-22 16:03:55 +00:00
Jonathan Earl Brassow
65f96d99e5 Fix a bad return code in 'lvconvert_raid'
Functions at this level do return 0 or 1, not ECMD_* values.
2012-02-22 15:20:50 +00:00
Petr Rockai
b1e320e5c4 Tweak lvmetad a bit more:
- allow at most one PV on any given device
- allow PV lookup by device
- merge the pvmeta info into VG metadata when responding to vg_lookup
2012-02-21 09:19:08 +00:00
Alasdair Kergon
6d4df258e4 post-release 2012-02-20 21:11:06 +00:00
Peter Rajnoha
c615046eec Call built-in blkid conditionaly (udev version >= 176), call standard blkid
with full path otherwise.
2012-02-20 19:38:40 +00:00
Alasdair Kergon
a742ee8b37 pre-release 2012-02-20 19:38:19 +00:00
Peter Rajnoha
bd746876ee Check whether udev supports built-in blkid.
Built-in blkid is supported since udev v176 - set the UDEV_HAS_BUILTIN_BLKID
variable appropriately so we can use it in the rules to call the built-in
blkid conditionaly.
2012-02-20 19:36:27 +00:00
Peter Rajnoha
a2e5d2cb9c Switch to using built-in blkid in 13-dm-disk.rules.
Available in udev since version 176.
2012-02-16 14:39:02 +00:00
Petr Rockai
b0fe3642c8 Drop the now-redundant pvid_to_status hash. 2012-02-15 17:37:09 +00:00
Petr Rockai
a0289c969f Update lvmetad: use device major/minor pair to track devices. Keep a pvmeta
config tree per PV which is mostly provided by the client, so it can be used to
keep track of things like label_sector, PV format, mda count / offsets and so
on.
2012-02-15 17:30:07 +00:00
Zdenek Kabelac
d6463f4890 Initialize dmeventd monitoring for every command
Read lvm.conf setting for monitoring for each command. So we should not
activate monitoring if the default compilation is set to monitor during
lvconvert commnads.

Patch also removes check for  clustered VG and allows to disable monitoring
for clustered VG with the assumption, the problem with monitoring and dmeventd
flag passing for INGNORE is already fixed.
2012-02-15 15:18:43 +00:00
Peter Rajnoha
8da4309f07 typo 2012-02-15 15:10:31 +00:00
Peter Rajnoha
f71e484598 Add watch rule to 13-dm-disk.rules.
We don't have anything better yet...

The problems the watch rule caused when removing devices should be covered
now with the "retry remove" logic. It's also better to have this maintained
by us, rather than having this rule anywhere else without proper control.
2012-02-15 14:50:33 +00:00
Zdenek Kabelac
d11718e479 Simplify with dm_strdup 2012-02-15 14:27:53 +00:00
Petr Rockai
580b0e871d Run tests with lvmetad if we have it. 2012-02-15 14:24:31 +00:00
Peter Rajnoha
d7a47a354f Fix segfault in dmsetup when using table specification with --table.
Segfault introduced with the patch that added dm_free(_table) at the
end of dmsetup (in this release).
2012-02-15 14:20:59 +00:00
Petr Rockai
351c6b96b8 (lvmetad) Remove unused variable. 2012-02-15 14:15:50 +00:00
Petr Rockai
cc3552f4a8 In lvmetad, also nuke VGs when all their PVs are stolen by another VG (vgmerge
& vgsplit do this).
2012-02-15 14:06:48 +00:00
Zdenek Kabelac
735a00a152 Detect failing fifo
If the fifo died because of dmeventd restart - do not wait for 20s
in select  - it will not get better and return error immediately.
2012-02-15 13:56:47 +00:00
Zdenek Kabelac
aaf717039e Deal with slower test processing
If the merge was faster then query - deal with it and pass the test.
2012-02-15 13:51:17 +00:00
Zdenek Kabelac
397d2bfe89 Initialize monitoring support only for thin pools 2012-02-15 13:49:51 +00:00
Zdenek Kabelac
eb9e8f8e3d Update cut&paste error message 2012-02-15 13:46:54 +00:00
Petr Rockai
a872331404 Turn a warning to an error in vgreduce --removemissing, since we exit with a
failure status there.
2012-02-15 12:30:46 +00:00
Peter Rajnoha
9fc640ffd9 Add dmsetup manpage entries for mangle command and --manglename option. 2012-02-15 12:25:23 +00:00
Peter Rajnoha
c0b2521a82 Add DM_DEFAULT_NAME_MANGLING_MODE env. variable to override configured value.
Just in case of emergency when name mangling code causes any problems so
we can override the default value and switch off the mangling globally.
2012-02-15 12:23:15 +00:00
Peter Rajnoha
17696422b6 Replace any '\' char with '\\' in table specification on input.
Device-mapper in kernel uses '\' as escape character so it's better
to double it to avoid any confusion when using existing device names
with '\' in the table specification.

For example:

dmsetup create x --table "0 8 linear /dev/mapper/a\x20b 0"

should pass just fine now without a need to explicitly escape the '\' char
like this:

dmsetup create x --table "0 8 linear /dev/mapper/a\\x20b 0"
2012-02-15 12:17:34 +00:00
Peter Rajnoha
7f024cf822 Add mangle command to dmsetup to provide renaming to correct mangled form. 2012-02-15 12:08:57 +00:00
Peter Rajnoha
f2e7993792 Add 'mangled_name' and 'unmangled_name' fields to dmsetup info -c -o. 2012-02-15 12:06:17 +00:00
Peter Rajnoha
5a923ff8ad Add --manglename option to dmsetup to select the name mangling mode. 2012-02-15 12:02:58 +00:00
Peter Rajnoha
9ec1097099 Unamngle dm device name list automatically on ioctl return.
If dm_task_get_name or dm_task_get_names gets called, these will return
unmangled form of the names so the name mangling stays totally transparent
to any libdevmapper user (unless DM_STRING_MANGLING_NONE is used in which
case the name is not touched and it is is returned as it is in kernel).

For example:

dmsetup create "a b" - will create a\x20b device in kernel and so udev will
create /dev/mapper/a\x20b

dm_task_get_name/names will still return "a b"

In AUTO mode, the libdevmapper user can still query the device by using
the mangled ("a\x20b") or unmangled form of the name when calling dm_task_set_name.
If mangled name is provided, it's detected and the name is kept as it is.
If unmangled name is provided, it will be mangled. IOW in AUTO mode it's
totally transparent and it should not require any changes in the code
using libdevmapper.

However, any libdevmapper user must be aware of the fact that the mangled form
of the name appears in /dev/mapper (udev just can't deal with those blacklisted
characters).
2012-02-15 12:01:28 +00:00
Petr Rockai
4ec2657ac3 lvmetad server-side update:
- rename the hashes to be explicit about the mapping
- add VG/PV listing calls to the protocol
- cache slightly more of the per-PV state
- filter cached metadata
- compare the metadata upon metadata_update
2012-02-15 11:43:06 +00:00
Peter Rajnoha
9d5d312d82 Add dm_task_get_name_mangled/unmangled to libdevmapper.
dm_task_get_name_mangled will always return mangled form of the name while
the dm_task_get_name_unmangled will always return unmangled form of the name
irrespective of the global setting (dm_set/get_name_mangling_mode).

This is handy in situations where we need to detect whether the name is already
mangled or not. Also display functions make use of it.
2012-02-15 11:39:38 +00:00
Peter Rajnoha
111b6717b6 Add DEV_NAME macro.
Use the DEV_NAME macro to use the mangled form of the name if present,
use normal name otherwise (we store both forms - mangled and unmangled in
struct dm_task). Mangled form should be always preferred over unmangled
with the exception of the situations where we divide one task into several
others (like "create and load") - we need to avoid mangling the name twice
(because of multiple dm_task_set_name calls)!
2012-02-15 11:33:53 +00:00
Peter Rajnoha
3ffe9aa27d Mangle device name on dm_task_set_name/newname call if necessary.
If dm_task_set_name/newname is called, the name provided will be
automatically translated to correct encoded form with the hex enconding
so any character not on udev whitelist will be mangled with \xNN
format where NN is hex value of the character used.

By default, the name mangling mode used is the one set during
configure with the '--with-default-name-mangling' option.
2012-02-15 11:27:01 +00:00
Peter Rajnoha
8a12e5a359 autoreconf 2012-02-15 11:20:12 +00:00
Peter Rajnoha
cd4fce855e Add configure --with-default-name-mangling.
This option configures the default name mangling mode used, one of:
AUTO, NONE and HEX.

The name mangling is primarily used to support udev character whitelist
(0-9, A-Z, a-z, #*-.:=@_) so any character that is not on udev whitelist
will get translated into an encoded form \xNN where NN is the hex value
of the character.
2012-02-15 11:17:57 +00:00
Petr Rockai
017c0c9e8c Do not forget to initialise the error value in daemon_reply. 2012-02-15 09:14:54 +00:00
Petr Rockai
d6afd3e0fc Fix pool names of the format1/format_pool orphan VGs. 2012-02-15 01:44:58 +00:00
Petr Rockai
d05bfd3265 In the test harness, do the back-substitution (@TESTDIR@, @PREFIX@) also on the
live VERBOSE output, not just the post-mortem dumps.
2012-02-15 01:31:10 +00:00
Jonathan Earl Brassow
4847399243 Make conversion from a synced 'mirror' to 'raid1' not cause a full resync.
It was not possible to pass down the DM_[FORCE|NO]SYNC flags to
'dm_tree_node_add_raid_target'.  This meant that converting to 'raid1' from
'mirror' would cause a full resync.  (It also meant that '--nosync' was
ineffective when creating a 'raid1' LV.)

I've taken the 'reserved' parameter in 'dm_tree_node_add_raid_target' and
used it for the "flags" parameter.  Now it is possible to pass the sync
flags and any other flags that may come up.
2012-02-13 20:13:39 +00:00
Jonathan Earl Brassow
d6f91a013b Change confusing message that is printed when a RAID device fails.
s/Issue/Use/, otherwise it is easy to misread "Issue" as "Issuing" - causing
the user confusion as to whether the action was performed automatically or
whether they need to issue the command.
2012-02-13 18:38:36 +00:00
Jonathan Earl Brassow
7daa233e48 Fix possible NULL pointer dereferences when updating mirror log.
'_lv_update_log_type' takes a lvconvert_params argument so that it can pass
down the user's preference of 'region_size' and allocation_policy.  When
'mirror_remove_missing' was introduced (commit ID
95986e42a1) it didn't make sense to pass down
user preferences - so NULL was given instead.  While it may never happen in
practice, static analysis reveals that this argument could be dereferenced.
So, if the user preferences were not passed in, glean the necessary fields
from what is already set in the LV.

Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>

(Not updating WHATSNEW for this simple clean-up.)
2012-02-13 18:36:55 +00:00
Jonathan Earl Brassow
9159aec6f1 Fix bug that caused RAID devices to be unable to activate if sub-LV was missing.
Commit 02f6f4902f introduced a bug that caused
RAID devices to fail to activate if the device for a single sub-LV failed.
The special case of LVM mirror was handled, but not LVM RAID.
EXAMPLE:
[root@bp-01 ~]# devices vg
  LV            Copy%  Devices
  lv            100.00 lv_rimage_0(0),lv_rimage_1(0)
  [lv_rimage_0]        /dev/sde1(1)
  [lv_rimage_1]        /dev/sdh1(1)
  [lv_rmeta_0]         /dev/sde1(0)
  [lv_rmeta_1]         /dev/sdh1(0)
[root@bp-01 ~]# vgchange -an vg
  0 logical volume(s) in volume group "vg" now active
[root@bp-01 ~]# off.sh sdh
Turning off sdh
[root@bp-01 ~]# vgchange -ay vg --partial
  Partial mode. Incomplete logical volumes will be processed.
  Couldn't find device with uuid fbI0YO-GX7x-firU-Vy5o-vzwx-vAKZ-feRxfF.
  Cannot activate vg/lv_rimage_1: all segments missing.
  0 logical volume(s) in volume group "vg" now active

AFTER this patch:
[root@bp-01 ~]# vgchange -ay vg --partial
  Partial mode. Incomplete logical volumes will be processed.
  Couldn't find device with uuid fbI0YO-GX7x-firU-Vy5o-vzwx-vAKZ-feRxfF.
  1 logical volume(s) in volume group "vg" now active
[root@bp-01 ~]# devices vg
  Couldn't find device with uuid fbI0YO-GX7x-firU-Vy5o-vzwx-vAKZ-feRxfF.
  LV            Copy%  Devices
  lv            100.00 lv_rimage_0(0),lv_rimage_1(0)
  [lv_rimage_0]        /dev/sde1(1)
  [lv_rimage_1]        unknown device(1)
  [lv_rmeta_0]         /dev/sde1(0)
  [lv_rmeta_1]         unknown device(0)
[root@bp-01 ~]# dmsetup table vg-lv; dmsetup status vg-lv
0 1024000 raid raid1 3 0 region_size 1024 2 253:2 253:3 - -
0 1024000 raid raid1 2 AD 1024000/1024000

No WHATSNEW update necessary because this is an intrarelease fix.

 brassow
2012-02-13 17:59:21 +00:00
Peter Rajnoha
c5d7b90b2a Superfluous '/' in systemd_dir path. 2012-02-13 17:31:14 +00:00
Zdenek Kabelac
35d3d400ec Fix missing temp_buf init for error path
In previous commit this was missing, also deallocate in reversed order.
2012-02-13 14:39:24 +00:00
Zdenek Kabelac
0d38ef9273 Add internal error to guard vgname2 NULL dereference 2012-02-13 14:26:15 +00:00
Zdenek Kabelac
2b62c3ed69 Add some FIXME around allocation code
Remove also unreachable break..
2012-02-13 14:25:14 +00:00
Zdenek Kabelac
cc05bde071 Never try to test character past given buffer
In case units[0] would be already '\0', do not check units[1].
2012-02-13 14:23:40 +00:00
Zdenek Kabelac
27d46fc686 Make sure dereferenced words[0] and words[1] are defined 2012-02-13 14:17:04 +00:00
Peter Rajnoha
07d54dd4a7 Add configure --with-systemdsystemunitdir. 2012-02-13 13:02:47 +00:00
Zdenek Kabelac
e975f85035 Handle all succefull values from open() 2012-02-13 12:07:51 +00:00
Zdenek Kabelac
c868f0ebf0 Ensure allocated device does not leak on error path
For unimplementd canonicalize_file_name set to NULL
2012-02-13 12:06:39 +00:00
Zdenek Kabelac
5a4638e22b Add internal error for unsupported code paths
Patch mainly helps static analyzers to better work with code paths
lvm code should never trigger.
2012-02-13 11:25:56 +00:00
Zdenek Kabelac
afed769d42 Check for missing reply_uuid 2012-02-13 11:24:09 +00:00
Zdenek Kabelac
a140caa666 Check for allocation failure 2012-02-13 11:18:45 +00:00
Zdenek Kabelac
dab4bb3bdf Add check for allocation failure 2012-02-13 11:16:42 +00:00
Zdenek Kabelac
2833f81fb3 Add few missing allocation failures tests 2012-02-13 11:13:44 +00:00
Zdenek Kabelac
bbf912b644 Add check for rimage name allocation failure 2012-02-13 11:10:37 +00:00
Zdenek Kabelac
05e8ec79e0 Add check for mda_copy failure 2012-02-13 11:09:25 +00:00
Zdenek Kabelac
78188982d6 Add check for failure 2012-02-13 11:07:55 +00:00
Zdenek Kabelac
6890aea4ca Add free_orphan_vg
Move commod code to destroy orphan VG into free_orphan_vg() function.
Use orphan vgmem for creation of PV lists.
Remove some free_pv_fid() calls (FIXME: check all of them)
FIXME: Check whether we could merge release_vg back again for all VGs.
2012-02-13 11:03:59 +00:00
Zdenek Kabelac
b2b5a2268a If the same fid is already same avoid ref_counting 2012-02-13 11:01:34 +00:00
Zdenek Kabelac
5ca11ba420 Add missing test for failure of lvmcache_foreach_pv 2012-02-13 10:58:20 +00:00
Zdenek Kabelac
c227ad23cc Clean error paths for format instance
With updated orphan VG code this code needed some updates.
Add missing log_error for allocation failures.
2012-02-13 10:56:31 +00:00
Zdenek Kabelac
5dc08c01d3 Release_vg instead of plain free in error path 2012-02-13 10:53:31 +00:00
Zdenek Kabelac
ec6a14f40a Log error reporting for failing _alloc_pv
Drop unneeded zeroing of zalloced memory region.
2012-02-13 10:51:52 +00:00
Zdenek Kabelac
7cf73cbc1f Do not write to -1 buffer address
In case of zero bytes would be read from sysfs, it would store '\0' on
temp_buf[-1] address.

Simplify some buffer length calculation and use strcpy if we've just
checked string fits in give buffer.

Replace jump label error: with bad: commonly used in libdm.
2012-02-13 10:49:28 +00:00
Zdenek Kabelac
193c832d12 Remove duplicit test
When it's space it's also not a '\0'.
2012-02-13 10:45:26 +00:00
Milan Broz
82e18a35a9 Update autoconf. 2012-02-13 07:30:24 +00:00
Fabio M. Di Nitto
69b9a614c7 In the new corosync world, dlm is a standalone service.
Fix clvmd init script to Require dlm service when building for the
new corosync or clvmd will fail to start.
2012-02-13 05:24:57 +00:00
Alasdair Kergon
52fdced782 post-release 2012-02-13 00:23:21 +00:00
Alasdair Kergon
2a7e2fe2d3 pre-release 2012-02-12 23:02:52 +00:00
Alasdair Kergon
3b6c6f1319 FMT_INSTANCE_VG is redundant now 2012-02-12 23:01:19 +00:00
Alasdair Kergon
11b3b12336 FMT_INSTANCE_PV is no longer used 2012-02-12 22:37:24 +00:00
Alasdair Kergon
b8a4a2b40b use stack consistently if 0 is considered an error 2012-02-12 21:42:43 +00:00
Alasdair Kergon
3e15add713 missing error mesg 2012-02-12 21:37:03 +00:00
Alasdair Kergon
06eaf8974a remove unused bits after fid changes 2012-02-12 20:19:39 +00:00
Alasdair Kergon
4ceec2023f give standard error message if lstat fails unexpectedly 2012-02-12 20:17:12 +00:00
Zdenek Kabelac
777a797ec3 Fix message check
Check pointer from strchr for NULL instead of crash later.
Badly formated message would have crash dmeventd otherwise.
2012-02-10 15:17:52 +00:00
Zdenek Kabelac
c559180cca Check for deps pointer before dererence
As _deps() call may return NULL - check for it.
2012-02-10 14:48:28 +00:00
Zdenek Kabelac
9fde529762 Add validation of name and uuid
Do not accept NULL pointers.
2012-02-10 14:42:28 +00:00
Zdenek Kabelac
50504ab73e Do not crash for NULL sort_key
Guard against NULL pointer for sort_key and let it behave like an empty
string would have been passed in (i.e. no key).
2012-02-10 14:00:07 +00:00
Zdenek Kabelac
514976816d Return error for failing allocation
Fix case, where final strdup would have failed and it would miss to return
failure for this case and return success and NULL pointer.
2012-02-10 13:56:19 +00:00
Zdenek Kabelac
00067fb64a Add test for failing allocation
Avoid memcpy to NULL if realloc fails.
2012-02-10 13:52:05 +00:00
Zdenek Kabelac
f6aab411f8 Add test for memory allocation failures
Replace asserts with test for failing memory allocation.
Add at least stack traces.
Index counter starts from 1 (0 reserved for error), so replacing fingerprint.
2012-02-10 13:49:29 +00:00
Zdenek Kabelac
c966f07bbd Remove unreachable code 2012-02-10 13:46:23 +00:00
Petr Rockai
e8ada4a717 What's new. 2012-02-10 02:56:54 +00:00
Petr Rockai
14d7b7b4e9 Keep a global (per-format) orphan_vg and keep any and all orphan PVs linked to
it. Avoids the need for FMT_INSTANCE_PV and enables further simplifications. No
functional change, internal refactor only.
2012-02-10 02:53:03 +00:00
Petr Rockai
395ee036d7 What's new: lvmcache. 2012-02-10 01:29:46 +00:00
Petr Rockai
1a28df6bb4 Move lvmcache data structures behind an API (making the structures private to
lvmcache.c). No functional change.
2012-02-10 01:28:27 +00:00
Peter Rajnoha
2936951d94 Stop processing lvextend if trying to extend a mirror that is being recovered.
Missing correct return value in lv_extend fn.
2012-02-09 15:13:42 +00:00
Zdenek Kabelac
e98b640d5d Move label_init up in code stack
label_exit() is called destroy_toolcontext() and we are now
using standard dm_list function for destroy, we have to make sure
dm_list gets initialized properly.
2012-02-08 13:44:49 +00:00
Zdenek Kabelac
6d29fe0f33 Thin add pool_below_threshold
Test both data and metadata percent usage.
2012-02-08 13:05:38 +00:00
Zdenek Kabelac
5365e4a53f A bit more readable code
Just a minor readability conversion.
2012-02-08 13:03:40 +00:00
Zdenek Kabelac
70e3728446 Fix test for lv_snapshot_percent
Do not check for PERCENT_MERGE_FAILED if the lv_snapshot_percent() failed.
(test for snap_percent would be testing uninitialized value).
2012-02-08 13:02:07 +00:00
Zdenek Kabelac
0cb3bdcfce Drop unreachable code 2012-02-08 12:59:45 +00:00
Zdenek Kabelac
d735798674 Use dm_asprintf to simplify code 2012-02-08 12:59:19 +00:00
Zdenek Kabelac
74c7c69d34 Some fixmes
'len' calculation is unused ?
Unreachable code could be removed or moved upward ?
2012-02-08 12:57:15 +00:00
Zdenek Kabelac
7ce534f911 Switch to return void
List delete cannot fail, so there is no reason to test for error.
2012-02-08 12:52:58 +00:00
Zdenek Kabelac
f03f322f13 Use dm_snprintf and improve error handling
Add standard error reporting with error logging.
Use plain alloc instead of zalloc for string buffer.
Use dm_snprintf with valid test for <0.
2012-02-08 12:50:10 +00:00
Zdenek Kabelac
c929a0b3c8 Add range test for device number
Check the output of atoi is in valid range.
2012-02-08 12:48:14 +00:00
Zdenek Kabelac
6bea32ccf2 Remove duplicate test
Tested condition has been already evaluated before
For strlen() code has already excluded <ID_LEN.
For repairing, already tested (!argc && !repairing) before.
2012-02-08 11:41:18 +00:00
Zdenek Kabelac
84facc4fa5 Replace snprintf with dm_snprintf
snprintf testing for negative is replaced with dm_snprintf where this
test really works.
Add missing test for result of dm_snprintf().
2012-02-08 11:40:02 +00:00
Zdenek Kabelac
7940545204 Remove unneeded assignments
Variables have (or will have) those values set.
2012-02-08 11:36:18 +00:00
Zdenek Kabelac
cc525412f1 Keep page_size as signed number
Since it's return value from sysconf and is checked for <0.
2012-02-08 11:34:46 +00:00
Zdenek Kabelac
962fd76f26 Move done jump lower
Since before 'goto done' is bufused zeroed, it would otherwise write 1
byte in front of buffer.
2012-02-08 11:31:29 +00:00
Zdenek Kabelac
6e40adde76 Add boundary test for number of mirror devs and logs
As atoi may return negative value - test for both limits.
Test log_args for limits before calling alloca().
Code from dmeventd mirror plugin should probably share same code as
we have in mirrored.c.
2012-02-08 11:29:13 +00:00
Zdenek Kabelac
fb6a209080 Set all parameters to 0
Since the function dm_get_next_target() returns NULL as 'next' pointer
so it's not a 'real' error - set 0 to all parameters when NULL is
returned because of missing head.

i.e. one of use case::
do {
	next = dm_get_next_target(dmt, next, &start, &length,
				  &target_type, &params);
	size += length;
} while (next);
2012-02-08 11:25:09 +00:00
Zdenek Kabelac
e6b610e8ed Check that whole locking_dir fits _lock_dir buffer 2012-02-08 11:17:34 +00:00
Zdenek Kabelac
bb0862fbb4 Move close few lines
Since the function dev_close() has code path, which really could close
file (for unlocked vg) and destroy dev handler, stay on safe side and move
the close few lines later, even our current use case shouldn't trigger
such scenario.
2012-02-08 11:15:38 +00:00
Zdenek Kabelac
e2d2f6c7b7 Use dm_list_iterate_items_safe
And avoid direct access to list member variables.
Inline _free_li().
2012-02-08 11:12:18 +00:00
Zdenek Kabelac
e48d39cba0 Fix fd resource leak in error path
Use 'goto bad' to cleanup fd on error path.
2012-02-08 11:07:17 +00:00
Zdenek Kabelac
4443aa41cc Ensure strncpy() function always ends with '\0'
Since last character needs to be \0 for string,
pass buffer size smaller by 1 byte.
2012-02-08 11:05:04 +00:00
Zdenek Kabelac
24da0046d5 Set status for error path
Do not leave status unitialized, since in some cases, it's tested,
when the function returns error.
2012-02-08 10:56:17 +00:00
Zdenek Kabelac
8d9b4b0133 Add missing deps for lvm2api
Hmm, wasted some time because of this missing deps....
2012-02-08 10:52:45 +00:00
Zdenek Kabelac
328ed80160 Fix resource leaks for failing allocation
In case, something would fail during format initialization,
return allocated memory.
2012-02-08 10:49:36 +00:00
Zdenek Kabelac
582c94aa21 Release allocated resources in error path
If composite_filter_create() fails, release filters.
2012-02-08 10:46:24 +00:00
Zdenek Kabelac
38188ff8aa Check result of lstat
If lstat returns errno different from ENOENT, do not use the content of
struct stat 'buf'.
2012-02-08 10:43:42 +00:00
Petr Rockai
c84c7ee034 What's new. 2012-02-01 20:13:44 +00:00
Petr Rockai
a3ac5a54d0 Remove a dubious log message ("another thread is handling an event") from LVM
dmeventd plugins. Fixes RHBZ 771419.
2012-02-01 20:11:58 +00:00
Alasdair Kergon
b93aa9eac4 post-release 2012-02-01 18:46:57 +00:00
Alasdair Kergon
d8e7d0e2bd pre-release 2012-02-01 15:17:04 +00:00
Alasdair Kergon
759f5b3349 Adjust comments 2012-02-01 15:05:53 +00:00
Zdenek Kabelac
433c583cd7 Add synchornization point in mirror log init.
Put extra sync point when mirror log is deactivated and before
it's activated for the second time.
2012-02-01 13:50:36 +00:00
Zdenek Kabelac
02f6f4902f Disable partial activation for thin LVs and LVs with all missing segments
Count number of error and existing areas and if there is no existing area
for the LV avoid its activation.

Always disable partial activatio for thin volumes.

For mirrors currently put in hack to let it pass with a special name
since current mirror code needs to activate such LV during some operations.
2012-02-01 13:47:27 +00:00
Zdenek Kabelac
90fd0f5b42 Avoid warning for small pv_min_size
Do not print warning for pv_min_size set in range between 512KB and 2MB.
2012-02-01 13:42:18 +00:00
Peter Rajnoha
56f6e9310d Clean up systemd unit ordering and requirements. 2012-02-01 13:08:39 +00:00
Zdenek Kabelac
8bec4f1a79 Minor consistency update for debugging messages
Use mlock/munlock for debugging lines.
2012-02-01 10:48:22 +00:00
Zdenek Kabelac
f00a162500 User correct base dir for lcov reports
Fix problem when srcdir != builddir.
2012-02-01 10:46:45 +00:00
Alasdair Kergon
1228bcaa0d lost line 2012-02-01 02:11:43 +00:00
Alasdair Kergon
b94b757e1e Track unreserved space for all alloc policies and then permit NORMAL to place
log and data on same single PV.
2012-02-01 02:10:45 +00:00
Alasdair Kergon
49fd0fef6b lp.pool may be (null) too 2012-02-01 01:54:20 +00:00
Alasdair Kergon
8416129244 Automatically detect whether corosync clvmd needs to use confdb or cmap. (fabio) 2012-01-31 21:21:53 +00:00
Zdenek Kabelac
b329581e8e Fix data% reporting
For reading % of mapped size of thin volume use as origin for
old style snapshot '-real' device needs to be queried.
Fix log_error report given for lvs -a in this case.
2012-01-28 20:12:26 +00:00
Zdenek Kabelac
0117473049 TARGETS macro has to be defined before inclusion of make.tmpl
Use @TESTING@ check in this case.
2012-01-27 10:55:02 +00:00
Alasdair Kergon
09e8d81a53 post-release 2012-01-27 01:23:40 +00:00
Zdenek Kabelac
317325ed54 Build test run target only for $(TESTING)
Add libraries properly in LDLIBS.
2012-01-26 21:40:36 +00:00
Zdenek Kabelac
6f9deb5b47 Thin for_each_sub_lv
Adapt to scan thin dependency LVs
2012-01-26 21:39:32 +00:00
Zdenek Kabelac
21fc2af47e If clvmd is not started, die test instead of skipping it 2012-01-26 18:25:46 +00:00
Zdenek Kabelac
3d0906496b Oops missed braces in previous commit
This has disabled clvmd for being executed.
(FIXME improve testing part to catch this fault)
2012-01-26 17:55:55 +00:00
Alasdair Kergon
f47f37485e pre-release 2012-01-26 14:02:42 +00:00
Alasdair Kergon
3876196da4 pre-release 2012-01-26 10:11:30 +00:00
Zdenek Kabelac
808f1706c2 Cosmetic change use return macro with implicit stack 2012-01-25 22:38:40 +00:00
Zdenek Kabelac
0f5dc427f6 Set missing header define 2012-01-25 22:37:48 +00:00
Zdenek Kabelac
28de76ed58 Fix leak of hash table
Minor leak on command initialization.
2012-01-25 22:36:33 +00:00
Zdenek Kabelac
4e9ef2eea3 Instrument code that pointer are already released
Set pointers to NULL since on the function exit they are no longer valid.
2012-01-25 22:35:36 +00:00
Zdenek Kabelac
a1f42a8e88 Set to a defined value vars used after error path
Static analyzer noticed this vars are used even when error is reported
back thus their state is undefined - set to 0 for this case.
2012-01-25 22:20:11 +00:00
Zdenek Kabelac
baffb459d8 Test for uname result
in fail path initialize to 0.
2012-01-25 22:17:57 +00:00
Zdenek Kabelac
381deeed8f Fix missing dmt destructor
Also always initialize maj,min,patchlevel when success is returned.
2012-01-25 22:16:04 +00:00
Zdenek Kabelac
6f37aa5e65 Free allocated segment type in error path 2012-01-25 21:54:00 +00:00
Zdenek Kabelac
4343bc5e86 Limit alignment to 32bit values
to get the same behavior on 32/64 machines.
2012-01-25 21:52:53 +00:00
Zdenek Kabelac
255e8cb60b Ensure whole info is initialised
Since _create_dm_tree_node is copying whole structure,
make sure all members are initialized.
2012-01-25 21:50:50 +00:00
Zdenek Kabelac
6bd7f256ae Fix resource leak of file handle
Introduces when added dm_device_get_name.
Close file handle in all error paths.
2012-01-25 21:47:18 +00:00
Zdenek Kabelac
5d1d269f45 Check for correctness of uint64 value if exists 2012-01-25 21:43:51 +00:00
Zdenek Kabelac
e152c0b9e6 Add breaks for cases 2012-01-25 21:42:09 +00:00
Zdenek Kabelac
cd55e7238a Ensure reply struct has all fields defined
Reply is returned by value.
2012-01-25 21:31:59 +00:00
Zdenek Kabelac
dfac9aef46 Check and print perror for syscalls 2012-01-25 21:30:27 +00:00
Alasdair Kergon
1835cb731d & -> && 2012-01-25 17:14:56 +00:00
Zdenek Kabelac
ab418f0317 Fix compilation with disabled devmapper
During release preparation things has changed, so making sure
we are compilable with --disable-devmapper.
2012-01-25 13:12:59 +00:00
Zdenek Kabelac
4fb4548a57 Thin read percent info from layered device
Since we only need layered device - check for tpool device presence,
so user doesn't need to activate thin pool explicitely for percent info.
2012-01-25 13:11:52 +00:00
Zdenek Kabelac
ec2012f736 Rename origin_only to more generic use_layer flag
Since now we have more layered devices i.e. thin volumes - support
selection of layer via flag.
2012-01-25 13:10:26 +00:00
Zdenek Kabelac
93903e1edf Clean var declarations to the front of the function 2012-01-25 13:06:57 +00:00
Zdenek Kabelac
500806dda9 Update verbose lvs to print metadata_percent info
Update lvs  -o fields in WHATS_NEW.
2012-01-25 11:32:41 +00:00
Zdenek Kabelac
7e134dfb8e Thin clear stacked message for thin pool
Before removing thin pool LV always make sure, stacked message
for previous run are cleared - but allow to remove any
device that should have been created
(i.e. creation of snapshot failed - so the message for snapshot creation
may be replaced with delete message within unfinished transaction).

Also commit messages after lv remove - so free space is released in pool.
2012-01-25 11:27:42 +00:00
Alasdair Kergon
dee7712483 Make commented out code more obvious 2012-01-25 11:10:06 +00:00
Zdenek Kabelac
cfd3d9413b Thin skip activation when there are no thin message
If the list with thin messages is empty, do not touch thin pool device.
2012-01-25 09:17:15 +00:00
Zdenek Kabelac
5feef4f92b Thin correct activation order
When the message is passed only in resume path the order needs
to be corrected.
2012-01-25 09:15:44 +00:00
Zdenek Kabelac
9644179f39 Thin use suspend/resume_lv_origin
Use origin_only support for thin volume when thin snapshot is created.
2012-01-25 09:14:25 +00:00
Zdenek Kabelac
9142bc74bb Thin use origin_only for thin pools as well
Extend the usage of origin_only flag to allow resume of thin pool LV
(when it's active) to pass only the messages.

origin_only flag will skip detection of already resumed tree for thin_pool,
so we do not need to suspend the tree and we just send messages.
2012-01-25 09:13:10 +00:00
Zdenek Kabelac
9518d362ab Thin add support for origin_only suspend of thin volumes
Pass in the origin_only flag also for thin volumes - but curently the flag
is not used to its best.

FIXME: achieve the state where only  thin volume snapshot origin is
suspended without its childrens -  let's explore whether this may
happen automatically inside libdm (might be generic for other targets).
So the code would not need to annotate the node for this.
2012-01-25 09:10:13 +00:00
Zdenek Kabelac
529267fe5d Thin add messages only for activation tree
Extend lv_activate_opts with bool flag to know for which purpose
dtree is created - and add message only for activation tree
(since that's the only place that may send them).

Extend validation check for thin snapshot creation and test whether
active snapshot origin is suspended before its snapshot is created
(useful in recover scenarios) -  in this case also detect, whether
transaction has been already completed and avoid such suspend check
failure in that case.
2012-01-25 09:06:43 +00:00
Zdenek Kabelac
c3606d4127 Thin check for lv_thin_pool_percent error status
Check has been missing.
2012-01-25 09:02:35 +00:00
Zdenek Kabelac
d7a3cf4744 Update lv_info whats_new 2012-01-25 09:00:57 +00:00
Zdenek Kabelac
fd3e9e102d lv_info using -real layer only for origin_only LV
If the origin_only flag is passed for non lv_is_origin LVs,
the extension is not added.

Thin volumes may also use origin_only flag.
2012-01-25 09:00:18 +00:00
Zdenek Kabelac
7264da9af1 Thin prevent removal of its data and metadata LVs
LVs cannot be removed while there are linked to thin pool.
(Gives better error message, than validation).
2012-01-25 08:57:25 +00:00
Zdenek Kabelac
c0f8f6270e Thin fix transaction_id incrementation and code refactoring
Add pool_has_message and use it in attach_pool_message.
Also update header to make more obvious which segment type is
expected as parameter.
Rename  'read_only' to  'no_update' (no auto update transaction_id)
to better fit how it's used.
Fix problem when there was only one stacked message replaced with delete
message that caused unwanted transaction_id increase.
2012-01-25 08:55:19 +00:00
Zdenek Kabelac
13c67c55af Comment cleanups
Move comment where it applies and remove unused attribe when the var
is actually used.
2012-01-25 08:51:29 +00:00
Zdenek Kabelac
538e7737d2 Thin dependency scan support
Go through pool_lv and metadata_lv LVs when doing recursive scan.
2012-01-25 08:50:10 +00:00
Zdenek Kabelac
f72b203392 Thin add lv_thin_pool_transaction_id
Easy function to get transaction_id status value.
2012-01-25 08:48:42 +00:00
Zdenek Kabelac
ea617ea71c Thin send messages on activation resume code path
Using PRELOAD part would lead to problems when the problem
would happen before vg_write and vg_commit.
Also this change is necessary for snapshot creation sequence.
2012-01-25 08:46:21 +00:00
Alasdair Kergon
b05ae8b950 Caller is still entitled to reference an LV that's unlinked, so don't
tamper with struct contents.
2012-01-24 14:53:59 +00:00
Jonathan Earl Brassow
cb09dc1274 Use suspend|resume_origin_only when up-converting RAID LVs, as mirrors do.
Failure to do so results in "Performing unsafe table load while X device(s) are
known to be suspended" errors.  While fixing the problem in this way works and
is consistent with the way the mirror segment type does it, it would be nice
to find a solution that uses the generic suspend/resume calls.

Also included in this check-in are additions to the test suite that perform
conversions on RAID LVs under a snapshot.  These tests are disabled for the
time being due to a kernel bug that is yet to be tracked down.
2012-01-24 14:33:38 +00:00
Milan Broz
6979dccd79 Properly show LV removal message.
(Fix regression in commit 6e181ba96d)
2012-01-24 14:15:52 +00:00
Alasdair Kergon
0942afa35d Use chunk_size consistently for thin_pool within LVM. 2012-01-24 00:55:03 +00:00
Jonathan Earl Brassow
fe5199c506 Fix the way RAID meta LVs are added to the dependency tree.
Similar to the "mirror" segment type's log device, _add_dev_to_dtree should
be called and not _add_lv_to_dtree when adding metadata sub-LVs to the deptree.
Since _add_lv_to_dtree was being called, 'origin_only' could be set if a
snapshot sits on top of the RAID device.  This would cause the actual device
that needed to be added to be skipped in favor of the non-existant device,
"<foo>-real".
2012-01-23 20:56:42 +00:00
Alasdair Kergon
ea9d0a19ca Fix metad help. 2012-01-23 17:48:18 +00:00
Alasdair Kergon
f41e4fba91 add const 2012-01-23 17:47:36 +00:00
Alasdair Kergon
7c979db7a9 Reorder fns in libdm-deptree.
Tweak dm_config interface and remove FIXMEs.
2012-01-23 17:46:31 +00:00
Alasdair Kergon
efa33f0b49 Add CLVMD_FLAG_REMOTE to skip processing on local node. 2012-01-21 05:31:54 +00:00
Alasdair Kergon
5aa7a0ec1c Attempt to improve clustered 'lvchange -aey' behaviour to try local node before
remote nodes and address some existing anomalies.
2012-01-21 05:29:51 +00:00
Mike Snitzer
7281c3ab01 Prompt if request is made to remove a snapshot whose "Merge failed". 2012-01-20 22:04:16 +00:00
Mike Snitzer
27393d999c Allow removal of an invalid snapshot that was to be merged on next activation.
Don't allow a user to merge an invalid snapshot.
2012-01-20 22:03:48 +00:00
Mike Snitzer
2ff68cece1 Use m and M lv_attr to indicate that a snapshot merge failed in lvs.
snapshot (m)erge failed, suspended snapshot (M)erge failed
2012-01-20 22:03:03 +00:00
Mike Snitzer
c25ad6e873 Differentiate between snapshot status of "Invalid" and "Merge failed". 2012-01-20 22:02:04 +00:00
Mike Snitzer
e756b80f56 Lookup snapshot usage percent of origin when a snapshot is merging. 2012-01-20 21:56:01 +00:00
Zdenek Kabelac
079473f10a Update lvdisplay to show more info about thin LVs
Reformat name and path how the LV is represented with lvm1 compatible option,
to switch to the old way - which had number of  problem - i.e. many links
do not exist - since for private devices we are not creating them.
Add more info about thin pools and volumes.
2012-01-20 16:59:58 +00:00
Zdenek Kabelac
ba92f67e23 Minor cleanup
Simplier and more readable char pointer math.
2012-01-20 11:01:56 +00:00
Zdenek Kabelac
e1eeb8e908 Thin until proper vgcfgrestore for thin is implementad, disable restore.
Since it may probably do more harm to leave it enabled - add extra test
for presence of thin volumes in VG, and in this case disable restore.
2012-01-20 11:01:13 +00:00
Zdenek Kabelac
76b1e2d9b1 Thin forgotten initialisation of pointer to NULL
Since the code may go in error path, set to defined NULL.
2012-01-20 10:59:26 +00:00
Zdenek Kabelac
8aa66458a3 Tiny cleanup
Just remove double braces from conditions when they are not really needed.
(So it doesn't look like an assignment and comparison).
2012-01-20 10:58:17 +00:00
Zdenek Kabelac
9abe5853b2 Thin cleanup
Reorder condition so the code is better readable (and shorter).
2012-01-20 10:56:30 +00:00
Zdenek Kabelac
a563ef56b8 Drop hack in segtype reporting
Since striped name function knows when to report 'linear' instead of
'stripe' type name - drop it from this place.

This fixes problem when reporting segtype e.g. for thin-pool which
is also using area_count=1 to store thin data device reference.

It also returns properly strduped memory instead of badly casted const char*.
2012-01-20 10:55:28 +00:00
Zdenek Kabelac
3486e8332d Thin extend lvcreate man page about snapshot support 2012-01-20 10:50:39 +00:00
Alasdair Kergon
cf22ba1971 . 2012-01-20 03:56:18 +00:00
Alasdair Kergon
3e8e48f9d0 improve comment 2012-01-20 03:46:52 +00:00
Jonathan Earl Brassow
6bcd3263c2 Preserve exclusive activation of cluster mirror when converting.
This patch to the suspend code - like the similar change for resume -
queries the lock mode of a cluster volume and records whether it is active
exclusively.  This is necessary for suspend due to the possibility of
preloading targets.  Failure to check to exclusivity causes the cluster target
of an exclusively activated mirror to be used when converting - rather than
the single machine target.
2012-01-20 00:27:18 +00:00
Zdenek Kabelac
f7a26917f2 Drop unimplemented 2012-01-19 16:22:42 +00:00
Zdenek Kabelac
946166795e Drop unused variable 2012-01-19 15:59:51 +00:00
Zdenek Kabelac
62fa6a8289 Thin indent update 2012-01-19 15:42:18 +00:00
Zdenek Kabelac
8aa21348d1 Thin disable snapshot creation when pool is over the threshold.
Since snapshot needs to suspend origin - it might lead to pool userspace
deadlock (as the pool will wait for new space in case it would be overfilled,
but dmeventd would not be able to resize it, as the lvcreate operation would
have kept the VG lock.)
To minimize the risk of such scenario - we prevent to create new snapshot
in case we are over the threshold - but beware, there is still small timewindow,
so keep threshold at some reasonable level!
2012-01-19 15:39:41 +00:00
Zdenek Kabelac
8d3933945a Thin add new display field for lvs
New field Data% is able to display info about
thin_pool, thin, snapshot and has generic meaning here.

Simple Time/Host field are here to display host and time creation.
2012-01-19 15:34:32 +00:00
Zdenek Kabelac
06849a0d6a Add support to keep info about creation time and host for each LV
Basic support to keep info when the LV was created.
Host and time is stored into LV mda section.

FIXME: Current version doesn't support configurable string via lvm.conf
and used fixed version strftime "%Y-%m-%d %T %z".
2012-01-19 15:31:45 +00:00
Zdenek Kabelac
491d106031 Thin add function to read thin volume percent
This value returns percentage of 'mapped' size compared with total LV size.
(Without passed seg pointer it return highest mapped size - but it's
not used yet.)
2012-01-19 15:27:54 +00:00
Zdenek Kabelac
9fae965546 Thin updated support for thin pool percent
Support to check also for metadata percent
(By checking whether seg pointer is set)
2012-01-19 15:25:37 +00:00
Zdenek Kabelac
57979a273c Thin rename seg var pool_metadata_lv to metadata_lv
Better fits the code.
2012-01-19 15:23:50 +00:00
Zdenek Kabelac
4174dad301 Thin handle empty thin volume case
Report both values as 0 in case the volume is unused.
2012-01-19 15:22:32 +00:00
Zdenek Kabelac
a1224b2da8 Thin use consistentely metadata
Do not shortcut to 'meta' and stay with 'metadata'
Also matches kernel doc for dm API then.
2012-01-19 15:21:23 +00:00
Zdenek Kabelac
440a4d4c76 Thin rename local static
Use '_' for local const char.
2012-01-19 15:19:18 +00:00
Zdenek Kabelac
3a55acee5b Rename internal macro to match signess
Since _read_int64 called dm_config_get_uint64, rename it to
less confusing _read_uint64.
2012-01-19 15:17:46 +00:00
Zdenek Kabelac
dd0fc6d3a8 Update for gcc old-style 2012-01-19 15:16:39 +00:00
Alasdair Kergon
5c9bebee8a Add dmsetup 'wipe_table' to replace table with one that uses error target. 2012-01-18 18:52:02 +00:00
Peter Rajnoha
de0822a2bf Move dm_task_set_newname from libdm-iface.c to libdm-common.c 2012-01-17 14:36:58 +00:00
Petr Rockai
693e6e76a9 Beef up the lvmetad code with more functionality and a bunch of bugfixes. There
used to be a few mis-ordered memory accesses (release and access in the next
block). Fix that set_flag could have sometimes corrupted the flags being
modified.

A few issues with metadata tracking are sorted out as well now, and there are
only a few problems remaining before we can integrate lvmetad, mostly on the
client side:

- metadata areas need to be tracked in lvmetad (most likely to be addressed
  through an extension of metadata, meaning no special support in lvmetad would
  be needed)
- non-udev scanning code needs to be taught about telling lvmetad about device
  disappearance (pvscan most importantly)
- this last item also needs to mesh with metadata inconsistencies and
  suddenly-incomplete volume groups (aux disable_dev in tests); udev-based
  scanning should address this separately and more elegantly
2012-01-16 08:25:32 +00:00
Petr Rockai
5463a05a32 Fix a boundary condition in read_buffer in daemon-shared.c. 2012-01-16 05:09:16 +00:00
Petr Rockai
e3b4fe6ed2 Fix prototypes for daemon_send_simple (const char). 2012-01-15 15:16:50 +00:00
Petr Rockai
921de583f4 Unfortunately, blank lines are sometimes produced by config serializer, and
this interferes with their role as message separator in the lvmetad
protocol. Switch to using "##" on an otherwise blank line as a separator.
2012-01-15 11:17:16 +00:00
Petr Rockai
e31a3a6da6 Fix a couple of resource leaks in daemon-common server code -- filehandles and
unjoined threads were leaked for each connection.
2012-01-15 10:33:41 +00:00
Alasdair Kergon
55bdb63f34 Make error message hit when preallocated memlock memory exceeded clearer. 2012-01-12 18:29:07 +00:00
Alasdair Kergon
75fe459800 Show read-only activation in display tools. 2012-01-12 16:58:43 +00:00
Peter Rajnoha
5bafde0b6f Missing const.
"warning: assignment discards 'const' qualifier..."
2012-01-12 09:08:55 +00:00
Alasdair Kergon
57fafd762f Add activation/read_only_volume_list to override LV permission in metadata. 2012-01-12 02:32:09 +00:00
Alasdair Kergon
642da16c33 Add activation/read_only_volume_list to override LV permission in metadata. 2012-01-12 01:51:56 +00:00
Alasdair Kergon
661f154b66 Give priority to emcpower devices with duplicate PVIDs. 2012-01-11 20:38:42 +00:00
Peter Rajnoha
8227bf45d1 Support different device name types on output of dmsetup deps, ls and info -c command.
Add 'blkdevname' and 'blkdevs_used' field to dmsetup info -c -o.
Add 'blkdevname' option to dmsetup ls --tree to see block device names.
Add '-o options' to dmsetup deps and ls to select device name type on output.
2012-01-11 12:46:19 +00:00
Peter Rajnoha
ad96b1b460 Add dm_device_get_name to get map name or block device name for given devno.
This is accomplished by reading associated sysfs information. For a dm device,
this is /sys/dev/block/major:minor/dm/name (supported in kernel version >= 2.6.29,
for older kernels, the behaviour is the same as for non-dm devices).

For a non-dm device, this is a readlink on /sys/dev/block/major:minor, e.g.
  /sys/dev/block/253:0 --> ../../devices/virtual/block/dm-0.
The last component of the path is a proper kernel name (block device name).

One can request to read only kernel names by setting the 'prefer_kernel_name'
argument if needed.
2012-01-11 12:34:44 +00:00
Alasdair Kergon
ee1e9fcd22 Add dm_uuid_prefix/dm_set_uuid_prefix for non-lvm users to override hard-coded
LVM- prefix.

Try harder not to leave stray empty devices around (locally or remotely) when
reverting changes after failures while there are inactive tables.
2012-01-10 02:03:31 +00:00
Zdenek Kabelac
7da3845926 Check for error code in _adjust_policy_params
If error is detected in _adjust_policy_params, break further command processing.
2012-01-09 12:31:52 +00:00
Zdenek Kabelac
8a3960d217 Minor oldstyle warning update 2012-01-09 12:29:15 +00:00
Zdenek Kabelac
99a1830b46 Improve readahead in dmsetup man page 2012-01-09 12:28:47 +00:00
Zdenek Kabelac
6cf41f5503 Use sysfs to set/get of read-ahead
If we know major:minor number of device (which is known after resume) we will
try to use  sysfs to set/get read ahead parameters of device.
This avoid potential problem of blocking commands like 'dmsetup info' awaiting
for device being usable for open/close - i.e. overfilled thin pool may block
such command.
2012-01-09 12:26:14 +00:00
Zdenek Kabelac
51c8cf97af Support rounding of percentage upward
We want to keep this logic -
when LV is extend - extend the LV by at least given amount,
when LV is reduced - reduce the LV by at most given amount.

So for this the rounding needs to be used.
Current logic which seems to satisfy give rule is to round up all
extent values for LV resize upward except for values with '-' sign
that are round downward.

This patch also fixes the problem when  lvextend --use-polices tried
to extend LV the by i.e. 20% - but the resulting 20% were smaller
the extent size thus before this patch no extension happened.
2012-01-05 15:38:18 +00:00
Zdenek Kabelac
46a75dedb4 Use new dmeventd_lvm2_command function in dmeventd plugins.
For snapshot, prepare whole command in front into private buffer.
Add also some missing '\n' for syslog messages.
For raid and mirror only convert creation of command line string.
This should avoid any unbound growth of mempool for dm_split_names.
2011-12-22 16:37:01 +00:00
Zdenek Kabelac
f4cb382829 Thin use helper function
Fix some minor outstading issue from thin plugin introduction -
Call dmeventd_lvm2_exit() in failpath for registration.
Add some missing '\n' in syslog messages.
2011-12-22 15:57:29 +00:00
Zdenek Kabelac
c06fdae782 Add helper function dmeventd_lvm2_command().
Since this code is in all plugins - create a common helper function.
2011-12-22 15:55:21 +00:00
Zdenek Kabelac
47bc4f5191 Updated documentation for dmeventd.
Update man page style.
Mention raid and thin plugins.
Update help text printed by command to match man page.
2011-12-22 15:50:38 +00:00
Zdenek Kabelac
52eae5fec2 Thin man page update 2011-12-21 13:58:38 +00:00
Zdenek Kabelac
d3e0fc4374 Thin update test case
Use thin-pool instead of thin_pool type.
2011-12-21 13:45:42 +00:00
Zdenek Kabelac
e906e9a160 Exported functions from thin plugin 2011-12-21 13:42:07 +00:00
Zdenek Kabelac
9e1ccf81c7 Drop extra stat before open of device
Since the !(dev->flags & DEV_REGULAR) code path just called
dev_name_confirmed() which has just called 'stat()' inside,
remove duplicate second stat() call here.
2011-12-21 13:24:24 +00:00
Zdenek Kabelac
88f8cbbb2e Do not lstat common path prefix
When both path have identical prefix i.e. /dev/disk/by-id
skip  2 x lstat() for /dev  /dev/disk /dev/disk/by-id
and directly lstat() only different part of the path.

Reduces amount of lstat calls on system with lots of devices.
2011-12-21 13:21:09 +00:00
Zdenek Kabelac
235028bf0f Add common initialization code for struct device
Avoid duplicate code and add _dev_init() where all common
member values are initialized.
2011-12-21 13:17:54 +00:00
Zdenek Kabelac
93bb35fd2b Always zalloc device structure
Since there is zalloc behind the macro, put 'z' into the name.
Make the 'use_malloc' code path also using zalloc() call,
so it also give zeroed area.
2011-12-21 13:14:54 +00:00
Zdenek Kabelac
d3923ece3d Thin automatic policy based extension 2011-12-21 13:10:52 +00:00
Zdenek Kabelac
58044875a8 Thin add lv_thin_pool_percent 2011-12-21 13:10:05 +00:00
Zdenek Kabelac
88bf72d388 Thin add dev_manager_thin_pool_percent
dev manager function to read percent info from thin pool.
2011-12-21 13:09:33 +00:00
Zdenek Kabelac
59bcd5a1cf Thin add dmeventd support
This is basic version with still few unresolved issue mainly in case,
when the pool resize is failing.
2011-12-21 13:08:11 +00:00
Zdenek Kabelac
b0f1e29335 Fix missing thread list manipulation
For manipulation with thread list to avoid race with timeout thread,
take also _timeout_mutex.
2011-12-21 13:03:06 +00:00
Zdenek Kabelac
1b9a70b6af Check lv pointer for NULL before derefence. 2011-12-21 12:59:22 +00:00
Zdenek Kabelac
d0b237f46c Thin remove unused code 2011-12-21 12:56:45 +00:00
Zdenek Kabelac
0857bdba0f Thin move layer suffix into local static const 2011-12-21 12:55:22 +00:00
Zdenek Kabelac
a1e1561a91 Thin rename internal thin pool segment
Use matching name as kernel target - useful when function like
_percent is using this for validation.
2011-12-21 12:54:19 +00:00
Zdenek Kabelac
219fa54fd4 Add Thin API for parsing thin status
Add dm_get_status_thin_pool and dm_get_status_thin functions to
parse 'params' argument which is received via dm_get_next_target.

Returns filed structure allocated from given mempool.
2011-12-21 12:52:38 +00:00
Zdenek Kabelac
2b61011439 Allow empty strings for description and creation_host config fields 2011-12-21 12:49:00 +00:00
Zdenek Kabelac
6298758992 Add dm_config_find_str_allow_empty
Support empty string values.
2011-12-21 12:47:44 +00:00
Alasdair Kergon
14a165bf3f Reinstate support for format1 snapshots, but issue deprecated warning.
I anticipate removing support for snapshots with lvm1-formatted metadata in a
future release.
2011-12-20 00:02:18 +00:00
Petr Rockai
a9ac026781 Add a unit test for config cascade (overlay). Needs expanding though. 2011-12-18 23:56:36 +00:00
Petr Rockai
27949793ae Fix up lvmetad for the minor API change in dm_config_create. 2011-12-18 22:31:10 +00:00
Petr Rockai
e4de6d2ee0 Make a cleaner split between config tree and config file functionality. Move
the latter out of libdm.
2011-12-18 21:56:03 +00:00
Peter Rajnoha
dc1d4e2444 Use also normalized output for vgchange command in lvm2-monitor init script. 2011-12-16 11:42:56 +00:00
Zdenek Kabelac
6b864994f3 Cleanup test compile warning
Add some declaration and cast to cleanup gcc warnings.
Add missing dm_config_destroy() to cleanup pool leak report.
2011-12-13 12:08:42 +00:00
Petr Rockai
55f7917356 Fix a compiler warning. 2011-12-12 00:08:23 +00:00
Petr Rockai
2d4b741115 Use a more canonic regex escaping in the default lvm.conf file for testing. 2011-12-12 00:01:12 +00:00
Petr Rockai
bd7ec82f64 Move dm_config_write out of libdm, back to lib/config, as config_write. 2011-12-11 23:18:20 +00:00
Petr Rockai
b3d86a26a6 Add a shell test for config parsing and merging (lvm dumpconfig). 2011-12-11 23:14:37 +00:00
Petr Rockai
cd143dc253 Autoreconf. 2011-12-11 17:24:37 +00:00
Petr Rockai
b82e7a4fcd Add a unit test for dm_config_clone_node. 2011-12-11 15:45:14 +00:00
Petr Rockai
7a17af253d Start a rudimentary unit test for the dm_config_* interface. 2011-12-11 15:19:41 +00:00
Petr Rockai
6d6b259fc0 In the dm_config_*get_* functions, make the actual value retrieval optional
(useful for just checking that a given key is of a given type).
2011-12-11 15:18:32 +00:00
Petr Rockai
1b068ee26b Use pkg-config to look for CUnit (needed for systems where it's not on the
standard include path).
2011-12-11 15:15:57 +00:00
Petr Rockai
c3c9864ed0 Nowadays, lvm-version.h is generated from lvm-version.h.in, and lives in the
build directory, not the source one -> fix test/shell/000-basic.sh.
2011-12-10 14:55:31 +00:00
Alasdair Kergon
9f2e5eee45 update FIXMEs 2011-12-10 00:47:23 +00:00
Alasdair Kergon
042d98b2d2 When dropping the VG metadata from the cache during a VG revert, we must
reset the 'precomitted' flag, just as we'd clear if we next did a 'commit'.
2011-12-08 23:18:00 +00:00
Alasdair Kergon
721119a388 Only use built-in stack size in clvmd - ignore lvm.conf. 2011-12-08 21:24:08 +00:00
Alasdair Kergon
4403e98eba Test for LCK_CLUSTER_VG directly in args[0].
Drop unused LCK_LOCAL from debug msg.
2011-12-08 18:32:33 +00:00
Alasdair Kergon
5495ef8217 Don't pass LCK_LOCAL to clvmd - it's unused.
Pass LCK_CLUSTER_VG in args[0] instead of args[1].
2011-12-08 18:19:05 +00:00
Alasdair Kergon
a830fcbc34 Update comments. CLUSTER_VG belongs firmly to args[0]. 2011-12-08 18:09:48 +00:00
Alasdair Kergon
58d9230ce2 reinstate !first_time check
(recovery from first_time failure would need different code)
2011-12-08 18:06:33 +00:00
Alasdair Kergon
aa8e6be045 tag uninit var 2011-12-08 18:00:03 +00:00
Peter Rajnoha
999263f18d Fix lvm2-monitor init script to use normalized output when using vgs. 2011-12-07 12:29:41 +00:00
Jonathan Earl Brassow
49d06db518 Add policy based automated repair of RAID logical volumes
The RAID plug-in for dmeventd now calls 'lvconvert --repair' to address failures
of devices in a RAID logical volume.  The action taken can be either to "warn"
or "allocate" a new device from any spares that may be available in the
volume group.  The action is designated by setting 'raid_fault_policy' in
lvm.conf - the default being "warn".
2011-12-06 19:30:15 +00:00
Milan Broz
15af345602 Fix FIXME and comment :-) 2011-12-03 11:36:10 +00:00
Milan Broz
cbea400d06 Switch locking bits to match RHEL5 version.
FIXME:
There is a problem with overloaded bit 0x80 in locking flag,
the bit flags array must be extended or changed.
2011-12-03 11:34:35 +00:00
Alasdair Kergon
8344acfa2d update old comment 2011-12-01 14:57:30 +00:00
Jonathan Earl Brassow
80924bc9ff Don't allow two images to be split and tracked from a RAID LV at one time
Also, don't allow a splitmirror operation on a RAID LV that is already tracking
a split, unless the operation is to stop the tracking and complete the split.
Example:
~> lvconvert --splitmirrors 1 --trackchanges vg/lv /dev/sdc1
# Now tracking changes - image can be merged back or split-off for good
~> lvconvert --splitmirrors 1 -n new_name vg/lv /dev/sdc1
# ^ Completes split ^

If a split is performed on a RAID that is tracking an already split image and
PVs are provided, we must ensure that
 1) the already split LV is represented in the PVs
 2) we are careful to split only the tracked image
2011-12-01 00:21:04 +00:00
Jonathan Earl Brassow
824ec24322 Don't allow size change of RAID LV that is tracking changes for a split image
Don't allow size change of RAID sub-LVs independently
2011-12-01 00:13:16 +00:00
Jonathan Earl Brassow
64b817cbc6 Do not allow users to change the name of RAID sub-LVs or the name of the
RAID LV if it is tracking changes for a split image.
2011-12-01 00:09:34 +00:00
Jonathan Earl Brassow
37e5e5e9c2 WHATS_NEW for previous commit. 2011-12-01 00:05:40 +00:00
Jonathan Earl Brassow
78b7b7db1a Do not allow users to change permissions on RAID sub-LVs. 2011-12-01 00:04:21 +00:00
Alasdair Kergon
b24e58c0ea refer to kernel docs for target info
FIXME thin still missing
2011-11-30 22:32:37 +00:00
Petr Rockai
8d884c5894 Update comment on LCK_*DMEVENTD*. 2011-11-30 17:02:37 +00:00
Petr Rockai
7dd63d8ff2 Fix clvmd to respect DMEVENTD_MONITOR_IGNORE. Fixes a bug where dmeventd
actions caused clvmd to turn off monitoring of the volume causing the action.
2011-11-30 17:00:57 +00:00
Jonathan Earl Brassow
204aaf2253 The LV_REBUILD flag is not internal - bad comments in metadata-exported.h updated 2011-11-30 02:20:13 +00:00
Jonathan Earl Brassow
02941f999c Support the ability to replace specific devices in a RAID array.
RAID is not like traditional LVM mirroring.  LVM mirroring required failed
devices to be removed or the logical volume would simply hang.  RAID arrays can
keep on running with failed devices.  In fact, for RAID types other than RAID1,
removing a device would mean substituting an error target or converting to a
lower level RAID (e.g. RAID6 -> RAID5, or RAID4/5 to RAID0).  Therefore, rather
than removing a failed device unconditionally and potentially allocating a
replacement, RAID allows the user to "replace" a device with a new one.  This
approach is a 1-step solution vs the current 2-step solution.

example> lvconvert --replace <dev_to_remove> vg/lv [possible_replacement_PVs]

'--replace' can be specified more than once.

example> lvconvert --replace /dev/sdb1 --replace /dev/sdc1 vg/lv
2011-11-30 02:02:10 +00:00
Peter Rajnoha
bb69784719 Fix last checkin for replicator. 2011-11-29 12:10:38 +00:00
Alasdair Kergon
52a9ed70c4 Add activation/use_linear_target enabled by default. (prajnoha)
LVM metadata knows only of striped segments - not linear ones.
The activation code detects segments with a single stripe and switches
them to use the linear target.

If the new lvm.conf setting is set to 0 (e.g. in a test script), this
'optimisation' is turned off.
2011-11-28 20:37:51 +00:00
Zdenek Kabelac
8ec116a6b8 Try longer sleep
Seems like testing machine could be busy - so 10s might not be enough
for rechecking status.
2011-11-23 15:27:14 +00:00
Zdenek Kabelac
a9b96bc87a Cleanup files from testing
Cleanup generated files from coverage testing.
Do not skip standard .o compilation for lib/not and lib/harness.
Make a bit longer string in harness to fit new shell/ in.
2011-11-23 14:56:47 +00:00
Zdenek Kabelac
078b23379e Swap condition order
Swap condtion so the make line does not return 'false'.
Remove cleanup of lvm2api.sh since it is not created anymore.
2011-11-23 13:03:57 +00:00
Zdenek Kabelac
cde518be8b Put back devmapper-event linkage
Older systems are not able to get this library from lvm2api library and need
this library to be passed to the linker (i.e. Debian Lenny).
2011-11-23 13:02:17 +00:00
Zdenek Kabelac
57781b3c41 Cleanup test makefiles
Simplify /api makefile and use SUBDIRS target for test dir.
Properly cleanup Makefiles with distclean in /test.
Use symbolic links for shell scripts for non-srcdir compilation.
2011-11-23 12:21:41 +00:00
Zdenek Kabelac
95abf92593 Split gcc warning options
Use gcc warning options only for .c -> .o compilation
So it makes the output more clear.

Do not use INCLUDES and DEFS for .o -> .so.

Do not use CFLAGS for deps .d generation.
2011-11-23 12:19:23 +00:00
Zdenek Kabelac
fcc74c44e0 Use 3 digits for chmod
Use same sized chmod params (0 are filled from left on its own)
and we use just 3 digits elsewhere.
2011-11-23 12:18:42 +00:00
Alasdair Kergon
f0509623ce Move y/n prompts to stderr and repeat if response has both 'n' and 'y'.
(Note that in a future release we might make this stricter and insist
on exactly 'y' or 'n'.)
2011-11-23 01:34:38 +00:00
Peter Rajnoha
4a927fd6db --addtag instead of --tag 2011-11-22 13:54:05 +00:00
Zdenek Kabelac
2e72b04376 Cleanup gcc warning
bitset_t.c:39: warning: 'last' may be used uninitialized in this function

Compiler is not smart enough to see the code path which avoid using
unitialized 'last'.
2011-11-21 13:15:40 +00:00
Zdenek Kabelac
d66df1becc Add some cflags to ldflags
Things like gcov needs the compilation flags for linker as well.
TODO: cleanup compilation flags usage
2011-11-21 13:11:03 +00:00
Petr Rockai
3ab1c64a47 More of WHATS_NEW. 2011-11-21 12:44:38 +00:00
Petr Rockai
72a4b0eada Update WHATS_NEW. 2011-11-21 12:33:56 +00:00
Petr Rockai
e0c7b65690 Fix a bug in dmeventd snapshot monitoring code where the monitoring threshold
would grow with subsequent snapshot extensions (RHBZ 754198).
2011-11-21 12:31:18 +00:00
Petr Rockai
13f9043a35 Fix the last fix (t-XXX -> shell/XXX). 2011-11-21 11:36:53 +00:00
Petr Rockai
9aa1e40276 Fix the fallout in shell tests from the t-XXX -> shell/XXX move. 2011-11-21 11:25:35 +00:00
Zdenek Kabelac
0d825ff728 Add CUnit testing support
Regenerate configure for unit test support.
2011-11-21 10:40:21 +00:00
Petr Rockai
633ef1a00b Tidy the shell tests into their own subdir. We now have:
- test/lib -- infrastructure and helper code
- test/api -- liblvm2app API tests
- test/unit -- C-based unit tests
- test/shell -- shell-based functional tests
2011-11-21 00:15:42 +00:00
Petr Rockai
6deb7e25cc Fix a syntax error. 2011-11-20 21:55:24 +00:00
Petr Rockai
e55220e27a Implement a CUnit-based runner for unit tests. Copy and adapt (actual unit)
tests from unit-tests/*/*_t.c (now under test/unit). The valgrind/pool test is
missing, since it's not really a unit test and probably not too valuable
either. Available via "make unit" (and if --enable-testing was passed to
configure, also executed by make check).
2011-11-20 21:43:20 +00:00
Alasdair Kergon
60d8256c38 Don't ignore configure --mandir and --infodir. 2011-11-20 20:52:09 +00:00
Zdenek Kabelac
4c10cc98c7 Drop pool memory allocated in lv_has_target_type
Remove FIXMES - there should not be any pool free call since
the memory pool is from device manager, and pool is detroyed
after the operation, so doing extra free here would not help here.

However lv_has_target_type() is using cmd mempool so here the extra
call for dm_pool_free makes sence.
2011-11-18 19:42:03 +00:00
Zdenek Kabelac
8b10abd792 Remove constant expression check
"result_independent_of_operands: ((dev->dev & 0xfff00UL) >> 8) ==
18446744073709551615UL /* -1 */ is always false regardless of the values
of its operands (logical operand of if)."

'dev->dev' is set in dev-cache.c _insert() and it's not expectable
st_rdev would have '-1'

This code has been introduced with drbd support commit and code never
worked - so eliminated.
2011-11-18 19:36:10 +00:00
Zdenek Kabelac
108cfe0b0b Check target type name for DM_MAX_TYPE_NAME length
Avoid creation of target type name when it's longer then
DM_MAX_TYPE_NAME (noticed by static analyzer where the
sp.target_type might be missing '\0' at the end.)

Before patch:

$> dmsetup create long
0 1000 looooooooooooooooooooooooooong
^D
device-mapper: reload ioctl failed: Invalid argument

After patch:

$> dmsetup create xxx
0 1000 looooooooooooooooooooooooooong
Target type name looooooooooooooooooooooooooong is too long.
Command failed
2011-11-18 19:34:02 +00:00
Zdenek Kabelac
f8484c44cc Replace dynamic buffer allocations for PATH_MAX
Use static buffer instead of stack allocated buffer.
This reduces stack size usage of lvm tool and the
change is very simple.

Since the whole library is not thread safe - it should not
add any new problems - and if there will be some conversion
it's easy to convert this to use some preallocated buffer.
2011-11-18 19:31:09 +00:00
Zdenek Kabelac
f1f5572fbb Unlock memory for vg_write
For write we do not need to hold memory locked.
This relaxes many conditions and avoid problems when allocating
a lot of memory for writting metadata buffers.
(In case of huge MDA size this would lead to mismatch between
locked and unlocked memory region size).

Add also internal check we are not writing in critical section.
2011-11-18 19:28:00 +00:00
Zdenek Kabelac
367ae9e86d Query before removing inactive snapshots
Removal of an inactive origin removes also all related snapshots.

When we now support 'old' external snapshots with thin volumes,
removal of pool will not only drop all thin volumes, but as
a consequence also all snapshots - which might be seen a bit
unexpected for the user - so add a query to confirm such action.

lvremove -f will skip the prompt.
2011-11-18 19:25:20 +00:00
Zdenek Kabelac
a21275e1b2 Allow to activate snapshot
Add extra code to active and deactivate related
snapshots and origin when user specifies snapshot
logical volume as lvchange parameter.

Before patch:

$> lvs -a
  LV    VG   Attr     LSize  Pool Origin Snap%  Move Log Copy%  Convert
  lvol0 mvg  owi-a-s-  1.00k
  lvol1 mvg  swi-a-s- 16.00k      lvol0    0.00
  lvol2 mvg  swi-a-s- 16.00k      lvol0    0.00

$> lvchange -an mvg/lvol2; echo $?
  Can't change snapshot logical volume "lvol2".
5

After patch:

$> lvchange -an mvg/lvol2
Change of snapshot lvol2 will also change its origin lvol0 and 1 other
snapshot(s). Proceed? [y/n]: n
  Logical volume lvol2 not changed.

$> lvchange -y -an mvg/lvol2; echo $?
0

$> lvs -a
  LV    VG   Attr     LSize  Pool Origin Snap%  Move Log Copy%  Convert
  lvol0 mvg  owi---s-  1.00k
  lvol1 mvg  swi---s- 16.00k      lvol0
  lvol2 mvg  swi---s- 16.00k      lvol0
2011-11-18 19:22:49 +00:00
Zdenek Kabelac
0ae01cf29d Skip non-virtual snapshots for availability
Change the behavior of availability change.
With this patch the lvgchange returns success
when VG is properly changed.

It skips non-virtual origins from being changes when
only 'vg' is specified as lvchange -a parameter.

Before this change we had this:

$> lvs -a
  LV    VG   Attr     LSize   Pool Origin
  lvol0 mvg  owi-a-s- 128.00k
  lvol1 mvg  owi-a-s- 128.00k
  lvol2 mvg  swi-a-s-   1.25m      lvol0
  lvol3 mvg  swi-a-s-   1.25m      lvol1

$> lvchange -an mvg ; echo $?
  Can't change snapshot logical volume "lvol2".
  Can't change snapshot logical volume "lvol3".
5

$> lvs -a
  LV    VG   Attr     LSize   Pool Origin
  lvol0 mvg  owi---s- 128.00k
  lvol1 mvg  owi---s- 128.00k
  lvol2 mvg  swi---s-   1.25m      lvol0
  lvol3 mvg  swi---s-   1.25m      lvol1

$> lvchange -ay mvg ; echo $?
  Can't change snapshot logical volume "lvol2".
  Can't change snapshot logical volume "lvol3".
5

$> lvs
  LV    VG   Attr     LSize   Pool Origin
  lvol0 mvg  owi-a-s- 128.00k
  lvol1 mvg  owi-a-s- 128.00k
  lvol2 mvg  swi-a-s-   1.25m      lvol0
  lvol3 mvg  swi-a-s-   1.25m      lvol1

After commit:

$> lvs -a
  LV    VG   Attr     LSize   Pool Origin
  lvol0 mvg  owi-a-s- 128.00k
  lvol1 mvg  owi-a-s- 128.00k
  lvol2 mvg  swi-a-s-   1.25m      lvol0
  lvol3 mvg  swi-a-s-   1.25m      lvol1

$> lvchange -an mvg ; echo $?
0

$> lvs -a
  LV    VG   Attr     LSize   Pool Origin
  lvol0 mvg  owi---s- 128.00k
  lvol1 mvg  owi---s- 128.00k
  lvol2 mvg  swi---s-   1.25m      lvol0
  lvol3 mvg  swi---s-   1.25m      lvol1

$> lvchange -ay mvg ; echo $?
0

$> lvs -a
  LV    VG   Attr     LSize   Pool Origin
  lvol0 mvg  owi-a-s- 128.00k
  lvol1 mvg  owi-a-s- 128.00k
  lvol2 mvg  swi-a-s-   1.25m      lvol0
  lvol3 mvg  swi-a-s-   1.25m      lvol1
2011-11-18 19:19:22 +00:00
Zdenek Kabelac
986a6d9d85 Adjusted mirror region size only for mirrors and raids
Update region_size only for mirror and raid targets.
This fixes warning messages when vg is using small
extent size like 1KiB and no mirror/raid is created,
but the user still got the message:

$> vgcreate -s 1K vg  <pvs>
$> lvcreate -L10K vg
Using reduced mirror region size of 4 sectors
2011-11-15 17:32:12 +00:00
Zdenek Kabelac
4c1d56cdcf Thin update prompt message
Enhance message with info about how many thin volumes are going to
be removed with thin pool removal.
2011-11-15 17:29:52 +00:00
Zdenek Kabelac
a6d0767d3e Reorder AND test condition
Take the easiest condition for checking first since they must
apply all together, check local conditions first before doing
more expensive tests.
2011-11-15 17:27:41 +00:00
Zdenek Kabelac
7411ce05c6 Thin clean
Reuse seg pointer already set in _add_lv_to_dtree to have the
value of first_seg(lv) (and is used in other parts of this function).
2011-11-15 17:25:05 +00:00
Zdenek Kabelac
a1c4a54ada Thin supports only thin volumes as snapshot origins
It's currently of the scope to properly solve the snapshoting
of internal thin devs so prevent non-toplevel snapshots here.
2011-11-15 17:23:51 +00:00
Zdenek Kabelac
96f7c3e2da Simplify iteration
Since nothing is removed in dm_list snapshot_segs during the loop,
there is no reason to use _safe iteration, so switch to simplier
dm_list_iterate().
2011-11-15 17:21:02 +00:00
Zdenek Kabelac
e60993068c Thin fix tpool layer
Since we support snapshots of thin volumes, we could have more layers,
so we have to check whether tpool layer is going to be inserted.

As the _add_segment_to_dtree() is the only place that adds tpool
segment, we may just check pointer (no strcmp for layer).

Switch to use  seg_is_  function instead of lv_is_.
2011-11-15 17:15:03 +00:00
Alasdair Kergon
dddf30e354 Include a copy of kernel DM documentation in doc/kernel 2011-11-15 13:54:20 +00:00
Peter Rajnoha
26ab12e69d Avoid 'mda inconsistency' by properly registering UNLABELLED_PV flag (2.02.86).
When a PV label write is deferred to a vg_write call (as introduced by a patch
in 2.02.86), the PV is flagged with the internal UNLABELLED_PV flag. However,
when calling vg_archive before vg_write, we still have the PV labelled with the
UNLABELLED_PV flag which was not recognised as a proper flag while exporting
VG metadata:

  # vgcreate vg /dev/sda
  No physical volume label read from /dev/sda
  Metadata inconsistency: Not all flags successfully exported.
  Metadata inconsistency: Not all flags successfully exported.
  Writing physical volume data to disk "/dev/sda"
  Physical volume "/dev/sda" successfully created
  Volume group "vg" successfully created
2011-11-15 11:54:15 +00:00
Alasdair Kergon
dd815a4f7b Make dmsetup.static and lvm.static build when dmeventd is disabled.
udev may also need to be disabled if you didn't build it statically too.

dmeventd.static could be fixed with some more work but I don't really see the
point: without dlopen() it's useless, and if you have dlopen(), why not support
normal shared libraries too?
2011-11-14 21:30:35 +00:00
Alasdair Kergon
bc33c1cda4 Incomplete documentation is worse than no documentation.
Any documentation less-detailed than Documentation/device-mapper is
dangerous for the non-trivial ctr lines.  And anyway, this should be in s4
not here.
2011-11-13 01:41:57 +00:00
Zdenek Kabelac
3d48fc1324 Add missing stack reporting
also remove unneeded {}
2011-11-12 22:53:23 +00:00
Zdenek Kabelac
b589204e85 Thin use items iterator and stack reporting 2011-11-12 22:52:18 +00:00
Zdenek Kabelac
8e24709ef2 Missing stack printing 2011-11-12 22:51:20 +00:00
Zdenek Kabelac
eb668d8981 Update dmsetup man page
Use standard manpage style.
Keep options and commands in alphabetic order.
Added at least a very simply info about some other targets.
TODO: documenting targest needs far more work...
2011-11-12 22:48:44 +00:00
Zdenek Kabelac
988e809ed4 Thin remove unused define
Remove DM_THIN_ERROR_DEVICE_ID from API.
Remove API warning.
Drop code that was using DM_THIN_ERROR_DEVICE_ID (already commented)
Remove debug message which slipped in through some previous commit.
2011-11-12 22:44:10 +00:00
Alasdair Kergon
b5518cb1ea Move gentoo MAKEDEV to /sbin in lvm2create_initrd. (James Le Cuirot) 2011-11-12 17:03:53 +00:00
Milan Broz
d2c074bf06 Fix major number filter structure boundary test. 2011-11-11 16:59:30 +00:00
Milan Broz
ba89f47b8d Remove unneeded parameter. 2011-11-11 16:41:37 +00:00
Milan Broz
0c789c6a9c And now add files for real. 2011-11-11 15:24:48 +00:00
Milan Broz
2da3c698fd Fix function name in previous patch. 2011-11-11 15:14:05 +00:00
Milan Broz
5584ecf7a2 Do not scan device if it is part of active multipath.
Add filter which tries to check if scanned device is part
of active multipath.

Firstly, only SCSI major number devices are handled in filter.

Then it checks if device has exactly one holder (in sysfs) and
if it is device-mapper device and DM-UUID is prefixed by "MPATH-".

If so, this device is filtered out.

The whole filter can be switched off by setting
mpath_component_detection in lvm.conf.

https://bugzilla.redhat.com/show_bug.cgi?id=597010

Signed-off-by: Milan Broz <mbroz@redhat.com>
2011-11-11 15:11:08 +00:00
Zdenek Kabelac
c93cfc6a13 Thin send create_snap message
Start creating snapshots for real.
Update test suite to check it happens.
2011-11-10 15:30:59 +00:00
Zdenek Kabelac
bd2ea128e0 Thin update testing script 2011-11-10 12:44:33 +00:00
Zdenek Kabelac
1ee4955b70 Add generic function to check for target version
So the test script may use a simple line like i.e.:

aux target_at_least dm-thin-pool 1 0 0 || skip
aux target_at_least snapshot-origin 1 6 || skip
2011-11-10 12:44:00 +00:00
Zdenek Kabelac
f0c52912f0 Small comment and indent updates 2011-11-10 12:43:05 +00:00
Zdenek Kabelac
3eb6a4adb6 Thin test min thin_pool size for at least 1 chunk 2011-11-10 12:42:36 +00:00
Zdenek Kabelac
1b990cc8aa Thin align volume size on chunk boundary size
If the extent_size is smaller then the chunk_size we may try
to find better aligment (wasting less space).

i.e. using  4KB extent_size and  64KB chunk size will
lead to creation of 64KB aligned thin volume.
2011-11-10 12:42:15 +00:00
Zdenek Kabelac
520afb5a43 Thin lvcreate manpage
Merge thin creation into standard 1st. lvcreate.
Update snapshot for thin support.
2011-11-10 12:41:39 +00:00
Zdenek Kabelac
69af4ab3ae Thin move _read_activation_params check
Since we finaly recognize thin creation only after
_determine_snapshot_type() - move _read_activation_params()
after it - so we can support  lvcreate -an thin snapshot.
2011-11-10 12:40:29 +00:00
Zdenek Kabelac
01da72eb3a Thin create normal (old) snapshot when size is specified 2011-11-10 12:39:46 +00:00
Zdenek Kabelac
815f56fd90 Thin disable pool create without activation 2011-11-10 12:39:01 +00:00
Mike Snitzer
be88d03380 Add missing free() for line that is malloc()'d by getline(). 2011-11-08 19:02:21 +00:00
Mike Snitzer
d25d6d456f Fix _get_proc_number to be tolerant of malformed /proc/misc entries.
Fixes issue reported here: http://lkml.org/lkml/2011/11/8/190
2011-11-08 17:32:10 +00:00
Zdenek Kabelac
2371f76c73 Thin ensure pool table is update after resize
Always make sure table gets reloaded.
For now activate and deactivate pool volume if it's not active.

FIXME: we could do this only if we are sure some thin volume is alive.
2011-11-08 12:19:53 +00:00
Zdenek Kabelac
1e6910032e Update lvs man page style. 2011-11-08 12:16:53 +00:00
Zdenek Kabelac
3b0407ad83 More lvcreate man updates
Use  { Name | Path } consistently like elsewhere.
Move --thin alphabetically before --type.
2011-11-08 12:12:31 +00:00
Petr Rockai
00c752b931 Correctly handle concurrent read (CR) locks in singlenode clvmd. This means
that we can also test clustered volume groups (vgcreate -c y) in the test
suite. Unfortunately we can't make this the testing default since cluster
mirrors require further infrastructure, and snapshots probably don't work at
all. I'll eventually add a few test cases that create clustered VGs
specifically.
2011-11-07 17:11:23 +00:00
Petr Rockai
1a7644a0f6 Do not repeat a failed test's output when VERBOSE is in effect. 2011-11-07 17:02:56 +00:00
Alasdair Kergon
b3d6e05694 Must not override alloc policy specified by user. 2011-11-07 13:54:54 +00:00
Zdenek Kabelac
2f6fbed90b Thin add error message for double delete
Add few more internal error messages.
2011-11-07 11:04:45 +00:00
Zdenek Kabelac
25227bfccb Thin supports snapshots
Full support for thin snapshots.
Create and remove is supported.

TODO: lvconvert support is not yes available.
2011-11-07 11:03:47 +00:00
Zdenek Kabelac
ae6833c8fa Add missing default LVM_VG_NAME
Add support for exported shell variable LVM_VG_NAME also
for thins and snapshots.
2011-11-07 11:01:53 +00:00
Zdenek Kabelac
db6bf15fcd Thin lvcreate poolmetadatasize units doc 2011-11-07 10:59:55 +00:00
Zdenek Kabelac
caa3faa47e Thin reindent code
Drop indention level
Add extra internal error.
2011-11-07 10:59:07 +00:00
Zdenek Kabelac
56a076fb9e Thin revert code for exclusive pool activation
There are no limits on thin-pool activation now.
Revert code that is no longer needed.
2011-11-07 10:58:13 +00:00
Zdenek Kabelac
32227d4720 Avoid lvextend to overflow
Add extra check to extent_count overflow.
Use internal define MAX_EXTENT_COUNT instead UINT32_MAX.
2011-11-04 22:49:53 +00:00
Zdenek Kabelac
4d126a5a80 Thin lvcreate man update
Added poolmetadatasize options
Add an example.
2011-11-04 22:47:17 +00:00
Zdenek Kabelac
d6ecde875a Thin pool allocation simplified
Support allocation of metadata from the same PV, if the VG
is build only from one PV.

As thinp is not mirror - we do not require 2 PVs
for basic thin usage as user is losing only perfomance.
2011-11-04 22:45:52 +00:00
Zdenek Kabelac
134641d62d Thin add thin_pool_metadata_require_separate_pvs
Allow to set different policy for pool from mirrors.
2011-11-04 22:44:21 +00:00
Zdenek Kabelac
d577391181 Thin supports poolmetadatasize setting
Add option to set pool metadatasize.
For passing size parameter reuse region_size.
2011-11-04 22:43:10 +00:00
Zdenek Kabelac
70a441fcda Thin fix condition check for transation_id
id2 must be checked.
(missed in yesterday commit set).
2011-11-04 12:39:45 +00:00
Alasdair Kergon
b316ae6fd1 Add missing lvrename mirrored log recursion in for_each_sub_lv. 2011-11-04 01:31:23 +00:00
Zdenek Kabelac
7ca380cf6d Thin keep pool device in the same state
Leave the optimalisation to be done differently and preserve
availability state of the pool device.
2011-11-03 15:58:20 +00:00
Zdenek Kabelac
c4c8553390 Thin no device is created - so nothing to revert here 2011-11-03 15:46:51 +00:00
Zdenek Kabelac
485734ae74 Thin removing limitation on activation of pool device.
Since activation of pool is now independent on thin activation,
user may do whatever he needs - thought preferable thin should stay alive,
but it it will be found inactivate, update_pool will bring the pool up.
2011-11-03 14:59:20 +00:00
Zdenek Kabelac
da4dca3881 Thin removing unused detach_pool_messages 2011-11-03 14:57:04 +00:00
Zdenek Kabelac
19af11c2eb Thin using update_pool_lv
Replace detach_pool_messages with update_pool_lv.
Move creation code from to 'if' condition into 1.
Ensure creation has finished all previous message operations.
2011-11-03 14:56:20 +00:00
Zdenek Kabelac
6fbce7358c Thin genering update_pool_lv function
Function to trigger pool message passing via resume,
or resize of the pool itself independently on other thins.
2011-11-03 14:53:58 +00:00
Zdenek Kabelac
9ebfab3a9f Add -tpool layer in activation tree
Let's put the overlay device over real thin pool device.
So we can get the proper locking on cluster.
Overwise the pool LV would be activate once implicitely
and in other case explicitely, confusing locking mechanism.
This patch make the activation of pool LV independent on
activation of thin LV since they will both implicitely use
real -thin pool device.
2011-11-03 14:52:09 +00:00
Zdenek Kabelac
e8d6ac48a7 Thin api change for passing message into libdm
Avoid exposing another struct to the libdm user and
use only simple dm_tree_node_add_thin_pool_message with
2 overloaded uint64_t values.
2011-11-03 14:45:01 +00:00
Zdenek Kabelac
a947464d4c Thin api change for dm_tree_node_add_thin_target
A little code shuffling and adding support for
DM_THIN_ERROR_DEVICE_ID which might be eventually be used
for activation of thin which is going to be deleted.
For now we do not need it lvm.
2011-11-03 14:43:21 +00:00
Zdenek Kabelac
1bab67020c Thin avoids need of having writable VG for activation 2011-11-03 14:41:18 +00:00
Zdenek Kabelac
264ca9ba32 Thin uses _tdata instead of _tpool for data LV
Switch to different suffix and keep -tpool reserved for overlay device name.
2011-11-03 14:38:36 +00:00
Zdenek Kabelac
e99a9fec1b Thin clean using delete_id consitently 2011-11-03 14:37:23 +00:00
Zdenek Kabelac
0e7603ecbb Thin code cleanup
Use iterate_items for list processing.
2011-11-03 14:36:40 +00:00
Peter Rajnoha
2d603339f1 Add "ExecReload" to dm-event.service for systemd to reload dmeventd properly.
Normally, restart simply means "stop and start" for systemd. However, if
we're installing new versions of the dmeventd binary/libdevmapper, we need
to restart dmeventd. This fails if we have some devices monitored - we need
to call "dmeventd -R" instead.

The "ExecReload" did not work quite well in some old versions of systemd,
systemd assumed that only the configuration is reloaded on "ExecReload",
not the whole binary itself so it lost track of dmeventd daemon (it lost new
dmeventd PID). This is fixed and seems to be working fine now with recent
versions of dmeventd.
2011-10-31 12:22:49 +00:00
Zdenek Kabelac
d39f58ac2e Thin fix compile warns
Test for dm_snprintf < 0.
Add header for moved backup.
2011-10-30 22:52:08 +00:00
Zdenek Kabelac
8ae8b9600c More updates for lvcreate manpage 2011-10-30 22:08:21 +00:00
Zdenek Kabelac
cdf67ba0d0 Thin creation without activation
All thins are created with the next activation and VG is updated
without messages. Only some basic commands works.
(i.e. lvcreate -an  -V10 -T mvg/pool)
There can be some combination to confuse this system.

This functionality for snapshots is going to be interesting.
2011-10-30 22:07:38 +00:00
Zdenek Kabelac
03a4d297db Thin segment transaction_id moved
Add a new node flag send_messages that is used to simplify
test when to call _node_send_messages().

Add call to _node_send_messages when pool is deeper in the tree.
2011-10-30 22:04:57 +00:00
Zdenek Kabelac
4486b3b6a5 Cleanup unsuccessfully created thin LV
If something fails during creation of thin LV remove such LV
and deactivate in case it's been already tried to activate
(i.e. thin kernel driver fails for some reason.)
2011-10-30 22:02:18 +00:00
Zdenek Kabelac
8ffa1880be Make detach_pool_message visible for tools
Move there also vg_write and vg_commit.
2011-10-30 22:01:39 +00:00
Zdenek Kabelac
eadb017622 Thin cleanups
Fix/cleanup several error messages.
Remove test for seg_is_thin which could never be true there.
Replace (1<<24) with predefined constant.
2011-10-30 22:00:57 +00:00
Zdenek Kabelac
9c5039d3e1 Cleanup for lvmetad passing uninit structure
Shown by clang analyzer.
2011-10-30 21:58:59 +00:00
Zdenek Kabelac
8b7c241b1a Improve valgrind support when compiled with DEBUG_MEM 2011-10-28 20:37:31 +00:00
Zdenek Kabelac
bb8a42ff56 Continue with lvcreate man page update
Already in WHATS_NEW.
2011-10-28 20:36:05 +00:00
Zdenek Kabelac
cb961cdc83 Avoid creation of /dev/vg/thinpool 2011-10-28 20:34:45 +00:00
Zdenek Kabelac
15812aae51 Thin option documentation 2011-10-28 20:33:50 +00:00
Zdenek Kabelac
f924413d23 Thin support for stripe
Support stripe options to create thin data pool LV.

TODO: combine chunk size and stripe size.
2011-10-28 20:32:54 +00:00
Zdenek Kabelac
23177b8aad Thin pool resize support for data LV
Support for extension of pool data LV.

TODO: figure out thin volume for suspend/resume in cluster.
2011-10-28 20:31:01 +00:00
Zdenek Kabelac
22e6256d00 Thin support for lvrename
Rename pool's metadata lv _tmeta together with pool and _tdata.
2011-10-28 20:29:32 +00:00
Zdenek Kabelac
b9f1afd47c Thin works only with exclusive activation
Enforce exclusive activation with thin targets.
2011-10-28 20:29:06 +00:00
Zdenek Kabelac
98b5d31edd Thin pool activation change
To ensure we properly handle LV cluster locking - explicitely do
not allow to change the availability of the thin pool that is in use
for some thin LV.

As soon as the thin volume is created the only way to activate pool
is via implicit dependency.

Ignore thinpool open count for lv/vgchange operations.
2011-10-28 20:28:00 +00:00
Zdenek Kabelac
209bd9887c Thin output data_block_size via outsize
Use outsize to get nice size hint.
2011-10-28 20:25:08 +00:00
Zdenek Kabelac
82a46ed62c Improve lv_extend stack reporting
and some code cleanup with setting return value.
2011-10-28 20:23:24 +00:00
Zdenek Kabelac
cde3a58d4e Thin error messages clenaup and some indent 2011-10-28 20:19:26 +00:00
Zdenek Kabelac
1d5d5e41f7 Remove thin code from mirror/raid lv_extend 2011-10-28 20:18:32 +00:00
Zdenek Kabelac
20e6a4528a Extend virtual segment instead of adding new one
Before adding a new virtual segment to LV, check first whether
the last segment isn't already of the same type. In this case
extend last segment instead of creating the new one.

Thin volumes should have always only 1 virtual segment, but it
helps also to virtual snapshot or error segtype..
2011-10-28 20:17:55 +00:00
Zdenek Kabelac
7aa87822f7 Add last_seg
Implement a function to return the last segment in a LV.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
2011-10-28 20:12:54 +00:00
Zdenek Kabelac
69359fcc6a Trying to fix the retry logic
There should be no need for retry for our internal devices - it would be hinding
our own bug in the tree processing.
Update error messages to show also also device name.
No WHATS_NEW - in release fix.
2011-10-28 20:11:21 +00:00
Zdenek Kabelac
bc69125ef0 Fix core on buggy config file
Since fixed within unreleased version so no WHATS_NEW
2011-10-28 20:07:38 +00:00
Zdenek Kabelac
f542af20e2 Add find_config_tree_str_allow_empty
Add function to allow read of empty strings as valid arguments.
Add a warning message if string argument has ignored value.
2011-10-28 20:06:49 +00:00
Zdenek Kabelac
42a14fa876 Update header comment 2011-10-28 20:04:17 +00:00
Jonathan Earl Brassow
09f8dcdd7e Update lvconvert man page to describe '--type <segtype>' argument
Man page changes were not included when the ability to convert from
"mirror" -> "raid1" or "linear" -> "raid1" were made.
2011-10-25 13:24:23 +00:00
Jonathan Earl Brassow
0fd3c2aa2b Disallow 'mirrored' log for cluster mirrors.
Git commit ID 0864378250 was meant to disallow
'mirrored' logs for cluster mirrors.  However, when add_mirror_log is used
to create the log (as is now the case when using 'lvcreate' or converting only
the log) the check is bypassed.

This patch adds the check to add_mirror_log.
2011-10-25 13:17:04 +00:00
Zdenek Kabelac
80cdb5240e Don't print char type[8] as a plain string
pvck prints 'extra' character from the label since there is no '\0'
after the struct label entry and just uint64_t follows directly.
So avoid it by limiting 8 chars to be printed.

https://www.redhat.com/archives/lvm-devel/2011-January/msg00109.html

Signed-off-by: Paul Bolle <pebolle tiscali nl>
2011-10-24 10:24:39 +00:00
Zdenek Kabelac
5ee6718daa Test with -F flag
grep need -F to check what we really want to test.
Add better test for existing device.

Currently this test DOES NOT work with real /dev handle via udev
since our tool does not see such device listet through udev.

FIXME: We might be able to see it at least through dmsetup table and
use for lvm.
2011-10-23 21:24:27 +00:00
Zdenek Kabelac
13cca48ddc Drop mempool parameter from read functions
Use implicit vgmem pool.
2011-10-23 16:05:45 +00:00
Zdenek Kabelac
f5aee12510 Always use vg memory pool for allocated lv segment
Remove mem pool parameter from alloc_lv_segment()
Since we should always allocate LV segment from the vg mempool.
2011-10-23 16:02:01 +00:00
Zdenek Kabelac
7b2a7b7c43 Used device name instead of device path
Udev is tricky here - since with udev the node does not exists until
it resume happens.  So we need to resume devices via its name.
2011-10-23 15:43:10 +00:00
Zdenek Kabelac
755de5972d Do not make link in the real /dev
Skip trying to make link when real /dev dir is used for test.
2011-10-23 15:40:15 +00:00
Zdenek Kabelac
25fa1c2783 Expect failure with real /dev dir
As udev is for now incapable to create such devices
turn this test error into warning.
2011-10-23 15:39:08 +00:00
Zdenek Kabelac
48d03bb158 Fix usage of DEBUG_ENFORCE_POOL_LOCKING with DEBUG_MEM
Since DEBUG_MEM is storing own extra structure within returned memory chunk,
glibc free must be used directly for posix_memaling() allocated block.
2011-10-23 15:38:02 +00:00
Petr Rockai
3b704b8e5d In t-lvcreate-large, do not create a clustered VG. LV activation/suspend does
not work yet with singlenode clvmd in clustered VGs.
2011-10-23 14:30:51 +00:00
Zdenek Kabelac
a675bc3d0a Reduce stack size usage in print_log
As the buf2[] and locn[] can't be used at the same time, safe 1 page from
stack memory.
2011-10-22 16:52:00 +00:00
Zdenek Kabelac
c71178b1ab Remove old thin code from _lv_insert_empty_sublvs
Since thin is not able to use _lv_insert_empty_sublvs,
remove its appearence from this function.

Start to use extend_pool() function for desired functionality
and modify lv_extend() for this.
2011-10-22 16:48:59 +00:00
Zdenek Kabelac
cf3d037c96 Ensure thin LVs take an exclusive activation 2011-10-22 16:47:23 +00:00
Zdenek Kabelac
493ea6330b Remove extra empty check
dm_list_splice handles empty list itself, no need to duplicate code.
2011-10-22 16:46:34 +00:00
Zdenek Kabelac
c7b5b355f7 Consistently use metadata LV as the first in MDA
Cosmetic cleanup.
Mark LV as thin pool before calling attach_pool functions.
2011-10-22 16:45:25 +00:00
Zdenek Kabelac
1fcd3fc4b5 Recoded way to insert thin pool into vg
Code in _lv_insert_empty_sublvs  was not able to provide proper
initialization order for thin pool LV.

New function extend_pool() first adds metadata segment to pool LV which
is still visible. Such LV is activate and cleared.

Then new meta LV is created and metadata segments are moved there.
Now the preallocated pool data segment is attached to the pool LV
and layer _tpool is created. Finaly segment is marked as thin_pool.
2011-10-22 16:44:23 +00:00
Zdenek Kabelac
b7bbad9964 Make move_lv_segment non-static
This function could be useful for other _manip source files.

Use dm_list manipulation function for provided functionality,
which make the code more readable and avoid touching list
internal details here.
2011-10-22 16:42:10 +00:00
Alasdair Kergon
bd86888d74 Pass exclusive LV locks to all nodes in the cluster.
This was the intended behaviour, as described in the lvchange man page, so you
have complete control through volume_list in lvm.conf, but the code seems to
have been treating -ae as local-only for a very long time.
2011-10-21 15:49:45 +00:00
Zdenek Kabelac
cd99a84e2b Store transaction_id with created thin lv
So we know the creation history and this should be useful with vgcfgrestore.
2011-10-21 11:38:35 +00:00
Zdenek Kabelac
9b55b4e08e Remove double-hack for setting metadata size
Drop the second lv_extend and set 128MB directly in the first hack place.
2011-10-21 09:55:50 +00:00
Zdenek Kabelac
5a6a8f4897 Thin pool now support chunk size as well
Use chunksize option to specify data_block_size for thin pool target.
Drop low_water_mark to zero.
2011-10-21 09:55:07 +00:00
Zdenek Kabelac
2604d06ec8 Make units for chunksize more obvious 2011-10-21 09:53:16 +00:00
Zdenek Kabelac
9709a27cb6 Aling structure chunk on default aling size
Since we use 8 bytes by default for the pool allocation,
keep the structure aligned with this size.
(Somehow it was not applied in previous commit.)
2011-10-20 14:43:33 +00:00
Zdenek Kabelac
04275a672a Mark chunk memory for free as defined again for valgrind
When DEBUG_MEM is used, the memory is trashed with extra pattern before real
free() is called, and as this memory was marked as non accessible when used with
valgrind, make it again usable.
2011-10-20 13:39:57 +00:00
Zdenek Kabelac
9c1f0f4aef Aling structure chunk on default aling size
Since we use 8 bytes by default for the pool allocation,
keep the structure aligned with this size.
2011-10-20 13:36:13 +00:00
Zdenek Kabelac
45e1399532 Simplify some pointer operations 2011-10-20 13:33:41 +00:00
Zdenek Kabelac
ed6d3763da Remove unused file from tree 2011-10-20 13:23:52 +00:00
Zdenek Kabelac
5c796f3378 Just replace stack, return 0 with return_0 2011-10-20 10:39:07 +00:00
Zdenek Kabelac
887c9834bb Cleanup backtraces
Make sure stacks are at the right places when something goes wrong here.
2011-10-20 10:38:04 +00:00
Zdenek Kabelac
e8c577fc2e Add last param 0 for thin-pool
So now the table suppression works for thin-pool.
2011-10-20 10:35:55 +00:00
Zdenek Kabelac
692d718d9d Ensure right activation order
Couple FIXMEs put into the code for parts of the code which may be
improved later, since we might be able to add 'lazy' device creation later.
For now require exclusive activation.
2011-10-20 10:35:14 +00:00
Zdenek Kabelac
bc8bfff5ff Adapt to thin kernel target API
Since kernel target uses low_water_mark - use this name in libdm as well.
2011-10-20 10:33:30 +00:00
Zdenek Kabelac
6e2fdfed67 Reindent code
Avoid 1 indent level and use check for empty list only for
add of transaction_id message.
2011-10-20 10:32:29 +00:00
Zdenek Kabelac
3f59f7e14f Use const pointers in thin API were appropriate 2011-10-20 10:31:27 +00:00
Zdenek Kabelac
8bf74e77c0 Print low_water_mark only when it has some value
Do not expose low_water_mark in mda yet, if it has no use.
We do not allow to be set via current lvm tool code.
Usage needs to be clarified first.
2011-10-20 10:30:39 +00:00
Zdenek Kabelac
79835da163 Add _BLOCK_ to define
Use DM_THIN_MIN_DATA_BLOCK_SIZE and
DM_THIN_MAX_DATA_BLOCK_SIZE to make it more obvious, for which
this define is useful in thin API.
2011-10-20 10:28:41 +00:00
Zdenek Kabelac
90f6336f85 Improve lvcreate man page
Split syntax for thin-pool since it cannot be fully matched with snapshot.
So to avoid more confusion - take thin support into separate line.

Though still significant updates are needed for thin provisioning.
2011-10-19 16:49:13 +00:00
Zdenek Kabelac
4f12b7ad6a Add few more protected names 2011-10-19 16:45:35 +00:00
Zdenek Kabelac
e610f31ca4 Use structure copy
Since the code evolved a bit with current structures we could use C to
copy struct members.
2011-10-19 16:45:02 +00:00
Zdenek Kabelac
0f220e5858 Update error message
Drop INTERNAL_ERROR from public API functions.
Improve some messages.
2011-10-19 16:42:14 +00:00
Zdenek Kabelac
7679ee9535 Use generic name for message sending function
Drop _thin_pool prefix for _node_send_message so it could be extended later.
Replace current_id with trans_id name.
2011-10-19 16:40:59 +00:00
Zdenek Kabelac
a374aed61c Simple validation of messages in mda
Check we do not combine multiple messages for same LV target
and switch to use  'delete_id' to make it clear for what this device_id
is being used.
2011-10-19 16:39:09 +00:00
Zdenek Kabelac
866b21532a Drop messages referencing deleted LV
lvremove may remove problematic LV for thin target.
2011-10-19 16:37:30 +00:00
Zdenek Kabelac
fc260f5bcb Just indent changes
Some tabs & spaces.
2011-10-19 16:36:39 +00:00
Zdenek Kabelac
c239c824f5 Add internal expected_errno dm_tast var
Certain errno codes could be expected in some situations thus
add experimental support for them.

When expected errno is set after ioctl error - function skips error
printing and exits succefully.

Currently only useful for thin pool messages.
2011-10-19 16:36:01 +00:00
Zdenek Kabelac
d1be6a0c37 Remove test for thin_pool
Since both functions are called during mda read - we don't have full LV info
at this moment.
2011-10-19 16:32:34 +00:00
Petr Rockai
9f4ef9fda6 Remove a redundant (and in some cases, misleading) message about snapshot
extension, in the snapshot dmeventd plugin. The reporting is done as needed by
the LVM command nowadays.
2011-10-19 14:31:49 +00:00
Petr Rockai
2a8662ce2e New. 2011-10-19 09:01:03 +00:00
Petr Rockai
6de64854a8 Keep the LVM-based dmeventd plugins from trying to manipulate the dmeventd
monitoring state of the logical volumes they are currently acting on.

Until now, every time a logical volume has been changed by a dmeventd plugin,
this plugin would have called back to dmeventd through the external FIFO
mechanism. I am fairly sure this was superfluous, inefficient and possibly even
dangerous.
2011-10-19 08:46:26 +00:00
Jonathan Earl Brassow
a4ddd21f8e Fix bad lvconvert help output.
The '--merge' option to lvconvert works on snapshots and RAID1.  The man
pages correctly reflect this, but the CLI help output still used the term,
'SnapshotLogicalVolume'.
2011-10-18 16:27:45 +00:00
Zdenek Kabelac
cd2eab0d10 Use zalloc for malloc,memset 2011-10-17 14:36:06 +00:00
Zdenek Kabelac
edb7aaf046 Drop messages from lvm app context
(revert)
Thinp target uses activation context.
2011-10-17 14:18:07 +00:00
Zdenek Kabelac
10c2510aaf Indent debug message 2011-10-17 14:17:30 +00:00
Zdenek Kabelac
f8690cf8d5 Message support for thin provisiong
lvm part of messaging.

Each message is now stored it's own thin pool section:

message1 {
	create = lv
}

Messages are queued to thin pool dm target when this target
is going to be resumed or used through some dependency.

Currently  'delete' message are purely queued and processed
with next thin pool resume operation (i.e. create_thin).

WARNING - thin provisioning support is developmental code.
2011-10-17 14:17:09 +00:00
Zdenek Kabelac
49e3017e4a Add thin_pool dm message support
Experimental support for kernel message via resume sequence.
2011-10-17 14:16:25 +00:00
Zdenek Kabelac
5c5acddf01 Add _thin_validate_device_id 2011-10-17 14:15:26 +00:00
Zdenek Kabelac
f5dace2cb6 Swap parameters
Use metadata uuid first (match kernel target).
2011-10-17 14:15:01 +00:00
Zdenek Kabelac
783b4e1068 Drop old check for transaction_id
(revert)
2011-10-17 14:14:33 +00:00
Milan Broz
c752c23d33 Fix alignment warning in bitcount calculation for raid segment. 2011-10-17 13:15:35 +00:00
Jonathan Earl Brassow
2fd1acc4dd Use a more correct macro for 'seg_is_linear'
It is better to check 'seg->area_count == 1' than '!seg->stripe_size'.
2011-10-14 14:21:32 +00:00
Jonathan Earl Brassow
c954b73149 cmirrord now returns log name to kernel in CTR so it can be registered
Version 2 of the userspace log protocol accepts return information during the
DM_ULOG_CTR exchange.  The return information contains the name of the log
device that is being used (if there is one).  The kernel can then register the
device via 'dm_get_device'.  Amoung other things, this allows for userspace to
assemble a correct dependency tree of devices - critical for LVM handling of
suspend/resume calls.

Also, update dm-log-userspace.h to match the kernel header associated with
this protocol change.  (Includes a version inc.)
2011-10-14 14:18:49 +00:00
Jonathan Earl Brassow
681ceb16d8 Update stale libdm/misc/dm-log-userspace.h
The upstream kernel version that this file mirrors has changed, here is the
commit message:

commit 86a54a4802df10d23ccd655e2083e812fe990243
Author: Jonathan Brassow <jbrassow@redhat.com>
Date:   Thu Jan 13 19:59:52 2011 +0000

    dm log userspace: add version number to comms

    This patch adds a 'version' field to the 'dm_ulog_request'
    structure.

    The 'version' field is taken from a portion of the unused
    'padding' field in the 'dm_ulog_request' structure.  This
    was done to avoid changing the size of the structure and
    possibly disrupting backwards compatibility.

    The version number will help notify user-space daemons
    when a change has been made to the kernel/userspace
    log API.

    Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
    Signed-off-by: Mike Snitzer <snitzer@redhat.com>
    Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2011-10-14 14:04:05 +00:00
Zdenek Kabelac
54e43fd3de Use pool for dm_tree allocation
Using the same pool allocation strategy as we use for vg,
so dm_tree structure is part of the pool itself.
2011-10-14 13:34:19 +00:00
Zdenek Kabelac
878960cdba Fix lv_info open_count test
When verify_udev_operations was disable, code for stacking fs operation for
lvm links was completely disable - but this code was also used for collecting
information, that a new node is being created.

Add a new flag which is set when a creation of lv symlinks is requested which
should restore old behaviour of lv_info function, that has called fs_sync()
before quere for open count on device.
2011-10-14 13:23:47 +00:00
Zdenek Kabelac
1ec22a4f73 Remove unused variables 2011-10-11 10:06:57 +00:00
Zdenek Kabelac
f4668058b4 Use constant for the repeated dlid size specification 2011-10-11 10:02:28 +00:00
Zdenek Kabelac
2e36a19e52 Add some fixme locking
Code here is using thread write protected variable without locking.
So add locking, for proper synchronization and a FIXME, since the
code needs closer look.
2011-10-11 09:56:44 +00:00
Zdenek Kabelac
e65626090b Simplify worker loop
Do not reacquire mutex several times without a real reason.
Code readability is also better.
2011-10-11 09:54:39 +00:00
Zdenek Kabelac
b39f294d02 Use barrier instead of mutex
Barrier is supposed to be used in situation like this
and replace tricky mutex usage, where mutex has been unlocked
by a different thread than the locking thread.
2011-10-11 09:26:04 +00:00
Zdenek Kabelac
0372454d07 Add FIXMEs for init_test
Usage of thread unprotected init_test is not correct and needs probably lvm lock
since it part of lvm library. Current implementation may probably fail with
test mode and actually create something unexpectedly (and vice versa).
2011-10-11 09:23:48 +00:00
Zdenek Kabelac
584a487461 Update 2011-10-11 09:20:17 +00:00
Zdenek Kabelac
e9bdc318fd Limit thread stack
Since default thread stack size is around 8MB and clvmd creates for now thread
for message, clvmd may easily reach multi GB size of in-memory locked pages
(runs with mlockall()).

This patch significantly reduces memory usage to just tens of MB,
and now different reasons are the cause of server overloading.
Now we are running out of free file descriptors mostly.
2011-10-11 09:18:49 +00:00
Zdenek Kabelac
d2cd3f4b76 Update whats new 2011-10-11 09:14:51 +00:00
Zdenek Kabelac
83b15720c0 Reduce preallocated stack size
Go with just 64KiB for stack.

Closer inspection should be made, whether we actually need to play with
settings at all.

Since default stack size is 8MB and gets mapped via page locking thus,
it seems there is no big help with preallocation of stack to some value.
2011-10-11 09:13:39 +00:00
Zdenek Kabelac
3701873dc9 Check for refresh_filter failure
Properly detect if the filters were refreshed properly.

(May needs few more fixes ??)

Filter refresh may fail because it may be out of free file descriptors
when clvmd gets overloaded.
2011-10-11 09:09:00 +00:00
Zdenek Kabelac
b105d7e207 Add missing log_error for alloc failure 2011-10-11 09:06:09 +00:00
Zdenek Kabelac
fea9b4eaa3 Use condition instead of sleep
Replace usleep with pthread condition to increase speed testing
(for simplicity just 1 condition for all locks).

Use thread mutex also for unlock resource (so it wakes up awaiting
threads)

Better check some error states and return error in fail case with
unlocked mutex.
2011-10-11 09:05:20 +00:00
Zdenek Kabelac
eb050343b9 Use shorter way for if() 2011-10-11 09:03:33 +00:00
Zdenek Kabelac
4802bbc548 Skip backtrace after log_error 2011-10-11 09:02:20 +00:00
Zdenek Kabelac
3822c98285 Replace with debug
Since the dm_tree_create already reports reason of error,
use log_debug for this message.
2011-10-11 09:01:38 +00:00
Zdenek Kabelac
d70b1eea5d Improve backtrace reporting
Add <backtrace> so the function appears logged for the fail path.
2011-10-11 08:59:42 +00:00
Zdenek Kabelac
0c92ec4d21 Change message severity
Using log_warn to report missing symlinks as warning, since the command
itself returns as successful, we should not produce log_error().
log_warn is better fit here.
2011-10-11 08:57:13 +00:00
Zdenek Kabelac
fdeda0b438 Skip r assignment
Cosmetic, since r is already 0 for the error path, no need to assign it there,
and r is assigned to 1 after switch command.
Also makes the code more readable.
2011-10-11 08:54:01 +00:00
Zdenek Kabelac
e7eebbc90f Reindent some thin functions 2011-10-11 08:51:56 +00:00
Zdenek Kabelac
fcbb8e5c5d Remove test for first_time with FIXME
Workaround for the current code with big FIXME,
since proper solution for pvmove needs to be developed.

Commiting this only for the purpose to get cluster testing covered.
2011-10-11 08:51:02 +00:00
Jonathan Earl Brassow
2c80ace622 Add the ability to convert LVs of "mirror" segtype to "raid1" segtype.
Example:
~> lvconvert --type raid1 vg/mirror_lv

Steps to convert "mirror" to "raid1"
1) Allocate a RAID metadata LV for each mirror image from the same PVs
   on which they are located.
2) Clear the metadata LVs.  This involves writing LVM metadata, so we don't
   change any aspects of the mirror LV before this so that the user can easily
   remove LVs from the failed convert attempt while retaining the original
   mirror.
3) Remove the mirror log, if it exists.
4) Add metadata LVs to mirror LV
5) Rename mirror sub-lvs (s/mimage/rimage/)
6) Change flags and segtype from mirror to raid1
2011-10-07 14:56:01 +00:00
Jonathan Earl Brassow
50a48b38f5 Add the ability to convert linear LVs to RAID1
Example:
~> lvconvert --type raid1 -m 1 vg/lv

The following steps are performed to convert linear to RAID1:
1) Allocate a metadata device from the same PV as the linear device
   to provide the metadata/data LV pair required for all RAID components.
2) Allocate the required number of metadata/data LV pairs for the
   remaining additional images.
3) Clear the metadata LVs.  This performs a LVM metadata update.
4) Create the top-level RAID LV and add the component devices.

We want to make any failure easy to unwind.  This is why we don't create the
top-level LV and add the components until the last step.  Should anything
happen before that, the user could simply remove the unnecessary images.  Also,
we want to ensure that the metadata LVs are cleared before forming the array to
prevent stale information from polluting the new array.

A new macro 'seg_is_linear' was added to allow us to distinguish linear LVs
from striped LVs.
2011-10-07 14:52:26 +00:00
Jonathan Earl Brassow
76ab264200 Allow 'nosync' extension of mirrors.
This patch allows a mirror to be extended without an initial resync of the
extended portion.  It compliments the existing '--nosync' option to lvcreate.
This action can be done implicitly if the mirror was created with the '--nosync'
option, or explicitly if the '--nosync' option is used when extending the device.

Here are the operational criteria:
1) A mirror created with '--nosync' should extend with 'nosync' implicitly
[EXAMPLE]# lvs vg; lvextend -L +5G vg/lv ; lvs vg
  LV   VG   Attr     LSize Pool Origin Snap%  Move Log     Copy%  Convert
  lv   vg   Mwi-a-m- 5.00g                         lv_mlog 100.00
  Extending 2 mirror images.
  Extending logical volume lv to 10.00 GiB
  Logical volume lv successfully resized
  LV   VG   Attr     LSize  Pool Origin Snap%  Move Log     Copy%  Convert
  lv   vg   Mwi-a-m- 10.00g                         lv_mlog 100.00

2) The 'M' attribute ('M' signifies a mirror created with '--nosync', while 'm'
signifies a mirror created w/o '--nosync') must be preserved when extending a
mirror created with '--nosync'.  See #1 for example of 'M' attribute.

3) A mirror created without '--nosync' should extend with 'nosync' only when
'--nosync' is explicitly used when extending.
[EXAMPLE]# lvs vg; lvextend -L +5G vg/lv; lvs vg
  LV   VG   Attr     LSize  Pool Origin Snap%  Move Log     Copy%  Convert
  lv   vg   mwi-a-m- 20.00m                         lv_mlog 100.00
  Extending 2 mirror images.
  Extending logical volume lv to 5.02 GiB
  Logical volume lv successfully resized
  LV   VG   Attr     LSize Pool Origin Snap%  Move Log     Copy%  Convert
  lv   vg   mwi-a-m- 5.02g                         lv_mlog   0.39
vs.
[EXAMPLE]# lvs vg; lvextend -L +5G vg/lv --nosync; lvs vg
  LV   VG   Attr     LSize  Pool Origin Snap%  Move Log     Copy%  Convert
  lv   vg   mwi-a-m- 20.00m                         lv_mlog 100.00
  Extending 2 mirror images.
  Extending logical volume lv to 5.02 GiB
  Logical volume lv successfully resized
  LV   VG   Attr     LSize Pool Origin Snap%  Move Log     Copy%  Convert
  lv   vg   Mwi-a-m- 5.02g                         lv_mlog 100.00

4) The 'm' attribute must change to 'M' when extending a mirror created without
'--nosync' is extended with the '--nosync' option.  (See #3 examples above.)

5) An inactive mirror's sync percent cannot be determined definitively, so it
must not be allowed to skip resync.  Instead, the extend should ask the user if
they want to extend while performing a resync.
[EXAMPLE]# lvchange -an vg/lv
[EXAMPLE]# lvextend -L +5G vg/lv
  Extending 2 mirror images.
  Extending logical volume lv to 10.00 GiB
  vg/lv is not active.  Unable to get sync percent.
Do full resync of extended portion of vg/lv?  [y/n]: y
  Logical volume lv successfully resized

6) A mirror that is performing recovery (as opposed to an initial sync) - like
after a failure - is not allowed to extend with either an implicit or
explicit nosync option.  [You can simulate this with a 'corelog' mirror because
when it is reactivated, it must be recovered every time.]
[EXAMPLE]# lvcreate -m1 -L 5G -n lv vg --nosync --corelog
  WARNING: New mirror won't be synchronised. Don't read what you didn't write!
  Logical volume "lv" created
[EXAMPLE]# lvs vg
  LV   VG   Attr     LSize Pool Origin Snap%  Move Log Copy%  Convert
  lv   vg   Mwi-a-m- 5.00g                             100.00
[EXAMPLE]# lvchange -an vg/lv; lvchange -ay vg/lv; lvs vg
  LV   VG   Attr     LSize Pool Origin Snap%  Move Log Copy%  Convert
  lv   vg   Mwi-a-m- 5.00g                               0.08
[EXAMPLE]# lvextend -L +5G vg/lv
  Extending 2 mirror images.
  Extending logical volume lv to 10.00 GiB
  vg/lv cannot be extended while it is recovering.

7) If 'no' is selected in #5 or if the condition in #6 is hit, it should not
result in the mirror being resized or the 'm/M' attribute being changed.


NOTE:  A mirror created with '--nosync' behaves differently than one created
without it when performing an extension.  The former cannot be extended when
the mirror is recovering (unless in-active), while the latter can.  This is
a reasonable thing to do since recovery of a mirror doesn't take long (at
least in the case of an on-disk log) and it would cause far more time in
degraded mode if the extension w/o '--nosync' was allowed.  It might be
reasonable to add the ability to force the operation in the future.  This
should /not/ force a nosync extension, but rather force a sync'ed extension.
IOW, the user would be saying, "Yes, yes... I know recovery won't take long
and that I'll be adding significantly to the time spent in degraded mode, but
I need the extra space right now!".
2011-10-06 15:32:26 +00:00
Jonathan Earl Brassow
1986e51928 Fix splitmirror in cluster having different DM/LVM views of storage.
This patch also does some clean-up of the splitmirrors code.

I've attempted to clean-up the splitmirrors code to make it easier to
understand with fewer operations.  I've tried to reduce the number of
metadata operations without compromising the intermediate stages which
are necessary for easy clean-up in the even of failure.

These changes now correctly handle cluster situations - including exclusive
cluster mirrors.  Whereas before, a splitmirror operation would result in
remote nodes having LVM commands report the newly split LV with a proper
name while DM commands would report the old (pre-split) names of the device.
IOW, there was a kernel/userspace mismatch.
2011-10-06 14:55:39 +00:00
Jonathan Earl Brassow
f7235e7cb4 Revert initial solution to bug 733114 - I/O error message during splitmirror
The original commit comments can be located via this git commit ID:
	7d8e615c0b

There were three possible solutions to the original problem proposed in the
initial check-in.  The one chosen was as follows:
    2) Do like _remove_mirror_images does and suspend the original, then suspend
    the sub-lv (the error target), then resume the sub-lv, and finally resume the
    original LV.  This seems like extra pointless operations to me, but it doesn't
    produce the error message (although, I'm not sure why) and it allows us to
    leave the visible flag in place.
Turns out, the cluster also views the extra suspend/resume operations as
pointless too and ignores them.  So, this solution doesn't work in a cluster.
Further, I've noticed that in addition to the remote cluster nodes still getting
I/O errors from scanning the error target, they also have a different LVM and
DM views of the same LV.  IOW, while the LVM level (gotten from the LVM metadata)
sees the correct name for the newly split LV, device-mapper still maintains the
old names.

Because the original fix failed to completely fix the problem (or work-around it)
and because a better solution must be found to address the additional cluster
issue of device renaming, I am reverting the above mentioned commit.
2011-10-06 14:49:16 +00:00
Jonathan Earl Brassow
857339758f This patch fixes issues with improper udev flags on sub-LVs.
The current code does not always assign proper udev flags to sub-LVs (e.g.
mirror images and log LVs).  This shows up especially during a splitmirror
operation in which an image is split off from a mirror to form a new LV.

A mirror with a disk log is actually composed of 4 different LVs: the 2
mirror images, the log, and the top-level LV that "glues" them all together.
When a 2-way mirror is split into two linear LVs, two of those LVs must be
removed.  The segments of the image which is not split off to form the new
LV are transferred to the top-level LV.  This is done so that the original
LV can maintain its major/minor, UUID, and name.  The sub-lv from which the
segments were transferred gets an error segment as a transitory process
before it is eventually removed.  (Note that if the error target was not put
in place, a resume_lv would result in two LVs pointing to the same segment!
If the machine crashes before the eventual removal of the sub-LV, the result
would be a residual LV with the same mapping as the original (now linear) LV.)
So, the two LVs that need to be removed are now the log device and the sub-LV
with the error segment.  If udev_flags are not properly set, a resume will
cause the error LV to come up and be scanned by udev.  This causes I/O errors.
Additionally, when udev scans sub-LVs (or former sub-LVs), it can cause races
when we are trying to remove those LVs.  This is especially bad during failure
conditions.

When the mirror is suspended, the top-level along with its sub-LVs are
suspended.  The changes (now 2 linear devices and the yet-to-be-removed log
and error LV) are committed.  When the resume takes place on the original
LV, there are no longer links to the other sub-lvs through the LVM metadata.
The links are implicitly handled by querying the kernel for a list of
dependencies.  This is done in the '_add_dev' function (which is recursively
called for each dependency found) - called through the following chain:
	_add_dev
	dm_tree_add_dev_with_udev_flags
	<*** DM / LVM divide ***>
	_add_dev_to_dtree
	_add_lv_to_dtree
	_create_partial_dtree
	_tree_action
	dev_manager_activate
	_lv_activate_lv
	_lv_resume
	lv_resume_if_active
When udev flags are calculated by '_get_udev_flags', it is done by referencing
the 'logical_volume' structure.  Those flags are then passed down into
'dm_tree_add_dev_with_udev_flags', which in turn passes them to '_add_dev'.
Unfortunately, when '_add_dev' is finding the dependencies, it has no way to
calculate their proper udev_flags.  This is because it is below the DM/LVM
divide - it doesn't have access to the logical_volume structure.  In fact,
'_add_dev' simply reuses the udev_flags given for the initial device!  This
virtually guarentees the udev_flags are wrong for all the dependencies unless
they are reset by some other mechanism.  The current code provides no such
mechanism.  Even if '_add_new_lv_to_dtree' were called on the sub-devices -
which it isn't - entries already in the tree are simply passed over, failing
to reset any udev_flags.  The solution must retain its implicit nature of
discovering dependencies and be able to go back over the dependencies found
to properly set the udev_flags.

My solution simply calls a new function before leaving '_add_new_lv_to_dtree'
that iterates over the dtree nodes to properly reset the udev_flags of any
children.  It is important that this function occur after the '_add_dev' has
done its job of querying the kernel for a list of dependencies.  It is this
list of children that we use to look up their respective LVs and properly
calculate the udev_flags.

This solution has worked for single machine, cluster, and cluster w/ exclusive
activation.
2011-10-06 14:45:40 +00:00
Jonathan Earl Brassow
29044ecb22 Fix vgsplit when there are mirrors that have mirrored logs.
The problem as reported by "ben <benscott@nwlink.com>" on lvm-devel:

vgsplit fails with mirrored mirror log

#lvs --all -o lv_name,lv_attr,devices
LV                       Attr   Devices
MyMirror                 mwi--
[MyMirror_mimage_0]      Iwi--- /dev/sdq(0)
[MyMirror_mimage_1]      Iwi--- /dev/sdo(0)
[MyMirror_mimage_2]      Iwi--- /dev/sdi(0)
[MyMirror_mlog]          mwi---
[MyMirror_mlog_mimage_0] Iwi--- /dev/sds(0)
[MyMirror_mlog_mimage_1] Iwi--- /dev/sde(0)

#vgsplit -v "TestA" "TestB" "/dev/sdq" "/dev/sdo" "/dev/sdi" "/dev/sds"
"/dev/sde"
  Checking for volume group "TestA"
  Checking for new volume group "TestB"
  Archiving volume group "TestA" metadata (seqno 213).
Can't split mirror MyMirror between two Volume Groups

AFTER FIX:

[root@bp-01 ~]# lvs -a -o name,vg_name,devices vg new
  Volume group "new" not found
  Skipping volume group new
  LV                 VG   Devices
  lv                 vg   lv_mimage_0(0),lv_mimage_1(0)
  [lv_mimage_0]      vg   /dev/sdb1(0)
  [lv_mimage_1]      vg   /dev/sdc1(0)
  [lv_mlog]          vg   lv_mlog_mimage_0(0),lv_mlog_mimage_1(0)
  [lv_mlog_mimage_0] vg   /dev/sdh1(0)
  [lv_mlog_mimage_1] vg   /dev/sdi1(0)
[root@bp-01 ~]# vgsplit vg new /dev/sd[bchi]1
  New volume group "new" successfully split from "vg"
[root@bp-01 ~]# lvs -a -o name,vg_name,devices vg new
  LV                 VG   Devices
  lv                 new  lv_mimage_0(0),lv_mimage_1(0)
  [lv_mimage_0]      new  /dev/sdb1(0)
  [lv_mimage_1]      new  /dev/sdc1(0)
  [lv_mlog]          new  lv_mlog_mimage_0(0),lv_mlog_mimage_1(0)
  [lv_mlog_mimage_0] new  /dev/sdh1(0)
  [lv_mlog_mimage_1] new  /dev/sdi1(0)
2011-10-06 14:17:45 +00:00
Zdenek Kabelac
b47df54d48 Add more validation to config parser
Do not leave it for vgvalidate().
2011-10-06 11:06:36 +00:00
Zdenek Kabelac
9f9b3e1e28 Move defines to header
Make limits for thin data_block_size and device_id part of public API.

FIXME: read them possible from some kernel header file in the future ?
But we may need to support different values for different versions ?
2011-10-06 11:05:56 +00:00
Alasdair Kergon
43dce243ab Clarify multi-name device filter pattern matching explanation in lvm.conf.5. 2011-10-04 20:49:24 +00:00
Alasdair Kergon
122476adc6 Clarify multi-name device filter pattern matching explanation in lvm.conf.5. 2011-10-04 20:45:36 +00:00
Zdenek Kabelac
1fef12cd31 Name changes
typo zeroeing->zeroing
add size low_water_mark->low_water_mark_size so it's more obvious its sector
related variable.
2011-10-04 16:22:38 +00:00
Zdenek Kabelac
f53fb47dd9 Use capital letters 2011-10-04 12:39:59 +00:00
Zdenek Kabelac
b79f082537 Missed rename pool->thin_pool
Fix compilation
2011-10-03 19:10:52 +00:00
Zdenek Kabelac
b12ec9b372 Add code to activate thin target
Code to zero pool metadata lv when pool is created.
Add code to create thin target via message sending.

(Revert is missing)
2011-10-03 18:43:39 +00:00
Zdenek Kabelac
4121fbb8a4 Add simple function for lookup of some free device_id
Initial simple implementation for finding some free device_id.
2011-10-03 18:39:17 +00:00
Zdenek Kabelac
f683063d42 Add lvm functions for sending messages.
Functions are currently only needed for thin provissioning.
2011-10-03 18:37:47 +00:00
Zdenek Kabelac
c6d777289b Add intial code to check transaction_id
Fix typy in transaction_id.
Add this as node property, so it could be easily checked on resume.

Code is not yet finished.
2011-10-03 18:34:52 +00:00
Zdenek Kabelac
d75053e74f Display transaction_id for thin_pool 2011-10-03 18:31:03 +00:00
Zdenek Kabelac
27596fa624 Move priority check in front
Just a minor code mode - make a test for priority before
more complex uuid checks.
2011-10-03 18:29:48 +00:00
Zdenek Kabelac
f8b4957694 Update error path tracing for _resume_node
dm_task_create & dm_task_set_name produces it's own log_error
Add missing stacks for dm_task_set_cookie, dm_task_run,
dm_task_get_info.
2011-10-03 18:28:25 +00:00
Zdenek Kabelac
2daddac019 Transaction_id is property of thin_pool
Remove Transaction_id from thin target.
Store device_id for thin target.
2011-10-03 18:26:07 +00:00
Zdenek Kabelac
a6d73dc760 Add preload support for thin and thin_pool 2011-10-03 18:24:47 +00:00
Zdenek Kabelac
56fc7c0053 Fix bad error message for thinp validation 2011-09-29 09:03:36 +00:00
Zdenek Kabelac
4ffb50268a Let the utils to prepare PVs 2011-09-29 08:58:27 +00:00
Zdenek Kabelac
3473d2f219 Typo in debug message 2011-09-29 08:57:21 +00:00
Zdenek Kabelac
4a59dda8aa Add experimental code for activation of thinp targets
No dm messages yes - just a base functionality in the steps of other targets.
For now usable only for debugging and tracing.
2011-09-29 08:56:38 +00:00
Zdenek Kabelac
222bbab442 Add supporting function for thinp
New dm_tree_node_add_thin_pool_target() and  dm_tree_node_add_thin_target()
This API is highly experimental and unstable for now.
2011-09-29 08:53:48 +00:00
Zdenek Kabelac
3589d75998 Just add warning about potential problem exteding dm_segtypes
Since raid target is using now dm_segtypes also for search purpose.
2011-09-29 08:50:54 +00:00
Jonathan Earl Brassow
3016856a4c New handy gdb debugging function, "dm_list_size"
Example:
(gdb) dm_list_size &split_images
1 list items
2011-09-28 16:32:22 +00:00
Alasdair Kergon
e0948b5825 Introduce revert_lv for better pvmove cleanup.
(One further fix needed to remove the stray pvmove LVs left behind.)
2011-09-27 22:43:40 +00:00
Alasdair Kergon
691157a71e Replace incomplete pvmove activation failure recovery code with a message.
As it stands, the recovery code can make things worse sometimes so it's
better to insist on a proper 'pvmove --abort' cleanup.
2011-09-27 17:29:33 +00:00
Alasdair Kergon
e63febe5ec Abort if _finish_pvmove suspend_lvs fails instead of cleaning up incompletely.
Change suspend_lvs to call vg_revert internally.
Change vg_revert to void and remove superfluous calls after failed vg_commit.
2011-09-27 17:09:42 +00:00
Alasdair Kergon
88c3d4b61a better -m0 error message, but there's an internal logic error to fix instead 2011-09-27 12:37:07 +00:00
Alasdair Kergon
fcdcef33c3 typo 2011-09-27 12:34:14 +00:00
Alasdair Kergon
ead841e0a3 correct thin_pool width 2011-09-27 12:33:36 +00:00
Zdenek Kabelac
26f43649f9 Show some Thin related info in lvdisplay 2011-09-26 13:11:02 +00:00
Peter Rajnoha
d1f949465f Add log_error even for general device in use when we can't do the sysfs checks. 2011-09-26 10:17:51 +00:00
Zdenek Kabelac
57f1027a03 Use execvp for clvmd restart
Since execve passed only NULL as environ, we had lost all environment vars on
restart - thus actually running  'different' clvmd then the one at start.

Preserving environ allows to restart clvmd with the same settings
(i.e. LD_LIBRARY_PATH)

Add test for second restart.
2011-09-26 07:51:23 +00:00
Zdenek Kabelac
7f7e0704f6 Remove test for NULL
Since it's internal function and we always check for NULL value
before call - this is safe.

Just for case add nonnull attribute so analyzer might better
catch error.
2011-09-25 19:45:40 +00:00
Zdenek Kabelac
ed3d5e9409 Add missing log_error messages 2011-09-25 19:43:43 +00:00
Zdenek Kabelac
f0633627b4 Add backtrace when allocation fails for _type 2011-09-25 19:42:45 +00:00
Zdenek Kabelac
1dd5dfed81 Replace test for NULL of root->child with test for NULL l
It's 100% equivalent test - since it always happen for the first iteration.
But the check for 'l' is understandable with analyzers - since analyzer
is not smart enough to deduce connection between  root->child == NULL.
2011-09-25 19:41:27 +00:00
Zdenek Kabelac
2d2d9ac875 Simplier attribute format
No need to repeat whole declaration for static function.
2011-09-25 19:40:29 +00:00
Zdenek Kabelac
3416af3f5d Chheck for failing filename strdup 2011-09-25 19:39:38 +00:00
Zdenek Kabelac
4da6e11c5a Use NULL for pointers 2011-09-25 19:38:59 +00:00
Zdenek Kabelac
bd085674b2 Restart CLVMD with same cluster manager
Add named cluster_ops to easily learn the name of the active cluster manager,
so we are able to restart singlenode manager in testing.

Add simple test for clvmd -S  (restart) and -R (refresh)
(though it needs some extensions).
2011-09-25 19:37:00 +00:00
Zdenek Kabelac
71ee4b8d25 Fix log_error() usage
Cosmetic - skip <bactrace> when error has been just printed in raid segtype.
Add missing log_error if allocation would fail for unknown segtype.
2011-09-24 21:19:30 +00:00
Zdenek Kabelac
1fd620a436 Allow overwrite for VERIFY_UDEV
When running tests it might be useful to have an override option when
testing on real /dev  and some broken system (i.e. Debian and its rules).

So one can use:

LVM_TEST_DEVDIR=/dev LVM_VERIFY_UDEV=1 make check
2011-09-24 21:15:13 +00:00
Zdenek Kabelac
5764014aa3 Avoid sending garbage to terminal in verbose mode.
When read in drain returned <0 value, terminal content has been trashed.
Remove unneeded  memset() and use whole buffer.
Free  readbuf before exit (valgrind).
2011-09-24 21:12:35 +00:00
Zdenek Kabelac
cc12990b2f Improvements
Simplify RUN_BASE

Put .tests-stamp deps only for check target and fix its cleanup.
Fix abs_top_srcdir.
vgimportclone needs  srcdir.
Clean  api subdir.
2011-09-24 21:10:19 +00:00
Zdenek Kabelac
a6791e34ba Fix install_ocf
When builddir is different from srcdir install_ocf: has not been able to find
files for installation.
2011-09-24 21:05:03 +00:00
Zdenek Kabelac
8deff7018a Drop cleanup of .exported_symbols_generated in DISTCLEAN_TARGETS
Makefile cosmetics - since .exported_symbols_generated in cleardir:
target via make.tmpl, there is no need to set them in DISTCLEAN_TARGETS.
2011-09-24 21:00:52 +00:00
Zdenek Kabelac
48b9cbab24 Use Makefile for daemmons/common library.
Next iteration for better fit of lvmetad compilation.

Move build of libdaemon.a into common subdir Makefile.
libdaemon.a is device-mapper target.

Build and install lvmetad as lvm2 target.
2011-09-24 20:57:49 +00:00
Zdenek Kabelac
4c71c99934 Detect unusable nodes on fs
Test whether nodes could be used on given filesystem where TMP
dir is being used and skip teardown quicker in fail case.
(makes the problem quickly obvious if you try to such fs).

Skip teardown_dev if we have not created any devs yet.

and do not mkdir /dev/mapper dir  when LVM_TEST_DEVDIR is set.

Drop this test from t-000-basic.sh.
2011-09-24 20:54:35 +00:00
Zdenek Kabelac
8ed5c47ce5 CLVMD support for LVM_CLVMD_BINARY and LVM_BINARY
Read 2 environmental vars to learn about overide position for
CLVMD and LVM binaries.

We support LVM_BINARY in other script - and this way we could easily
test restart in our test-suite.
2011-09-24 20:50:35 +00:00
Zdenek Kabelac
9c2095311e CLVMD bugfix support for args -S -E
Bugfix:
Add (most probably unfinished) support for -E arg with list of exclusive
locks.  (During clvmd restart all exclusive locks would have been lost and
in fact, if there would have been an exclusive lock, usage text would be
printed and clvmd exits.)

Instead of parsing list options multiple times every time some lock UUID is
checked - put them straight into the hash table - make the code easier to
understand as well.

Remove  was_ex_lock() function (replaced with dm_hash_lookup()).

Swap return value for get_initial_state() (1 means success).

Update man pages and usage info for -E option.
2011-09-24 20:48:34 +00:00
Peter Rajnoha
5d604a99f6 readlink does not append a null byte to the output string! 2011-09-24 11:47:53 +00:00
Jonathan Earl Brassow
283094fb67 update man page for new RAID lv_attr characters. 2011-09-23 19:21:53 +00:00
Alasdair Kergon
4e8d5bc726 explain why we may now retry 2011-09-23 17:16:28 +00:00
Alasdair Kergon
e7bc2978e5 missing Makefile dep 2011-09-23 17:05:44 +00:00
Jonathan Earl Brassow
1e32e75ee4 Update the RAID design doc to reflect some of the new options introduce (e.g.
--merge and --trackchanges) and document the coding steps of up/down-conversion,
splitting RAID1 images, and merging RAID1 images.
2011-09-23 17:04:41 +00:00
Jonathan Earl Brassow
c6e1353c4d Add 'Volume Type' lv_attr characters for RAID and RAID_IMAGE.
RAID_META is already handled.
2011-09-23 15:17:54 +00:00
Zdenek Kabelac
d00941c9a4 Update dmsetup man page
While dmsetup command properly shows all dmsetup resume supported options,
man page missed to document it.
2011-09-23 09:51:37 +00:00
Zdenek Kabelac
373b717a5f Build all man pages
When running plain 'make' in man dir - do also those for device-mapper target.
2011-09-23 09:48:21 +00:00
Peter Rajnoha
2729ca70bc Initialize 'retryable' variable. 2011-09-22 17:59:58 +00:00
Peter Rajnoha
ef2a81b860 Add activation/retry_deactivation to lvm.conf to retry deactivation of an LV. 2011-09-22 17:39:56 +00:00
Peter Rajnoha
3157f8e8fa Add dm_tree_retry_remove to use retry logic for device removal in a dm_tree. 2011-09-22 17:36:50 +00:00
Peter Rajnoha
638409a573 Replace open_count check with holders/mounted_fs check on lvremove path.
Before, we used to display "Can't remove open logical volume" which was
generic. There 3 possibilities of how a device could be opened:
  - used by another device
  - having a filesystem on that device which is mounted
  - opened directly by an application

With the help of sysfs info, we can distinguish the first two situations.
The third one will be subject to "remove retry" logic - if it's opened
quickly (e.g. a parallel scan from within a udev rule run), this will
finish quickly and we can remove it once it has finished. If it's a
legitimate application that keeps the device opened, we'll do our best
to remove the device, but we will fail finally after a few retries.
2011-09-22 17:33:50 +00:00
Peter Rajnoha
def057522f Add dm_device_has_holders fn to to check use of the device by another device.
Add dm_device_has_mounted_fs fn to check mounted filesystem on a device.

This requires sysfs directory to be correctly set via dm_set_sysfs_dir
(/sys by default). If sysfs dir is not used or it's set incorrectly,
dm_device_has_{holders,mounted_fs} will return 0!
2011-09-22 17:23:35 +00:00
Peter Rajnoha
1f92ab6e8a Add dm_set_sysfs_dir to libdevmapper to set sysfs location.
Add dm_sysfs_dir to libdevmapper to retrieve sysfs location thas is set.
2011-09-22 17:17:07 +00:00
Peter Rajnoha
2f64783850 Add --retry option for dmsetup remove to retry removal if not successful. 2011-09-22 17:12:28 +00:00
Peter Rajnoha
6b1629455a Add dm_task_retry_remove fn to use retry logic for device removal.
This call ensures that the dm device removal is retried several
times before failing.
2011-09-22 17:09:48 +00:00
Jonathan Earl Brassow
1d251c85da Disallow the creation of mirrors (mirror or raid1 segtype) with only one leg.
If you specify the segment type (e.g. --type mirror) and the mirrors argument
as zero, it would result in a mirrored LV with only one image.  While the device
may be valid in theory, it should not be allowed in practice.  It also makes it
difficult on the conversion tools, since they react badly to single-image
mirrors.
2011-09-22 15:36:21 +00:00
Jonathan Earl Brassow
3fa30a939f When up-converting a RAID1 array, we need to allocate new larger arrays for
seg->areas and seg->meta_areas.  We also need to copy the memory from the
old arrays to the newly allocated arrays.  The amount of memory to copy was
determined by seg->area_count.  However, seg->area_count was being set to the
higher value after copying the 'seg->areas' information, but before copying
the 'seg->meta_areas' information.  This means we were copying more memory
than necessary for 'seg->meta_areas' - something that could lead to a segfault.
2011-09-22 15:33:21 +00:00
Zdenek Kabelac
9cd366bf2b Clvmd restart cleanup
Patch fixes Clang warnings about possible access via lv_name NULL pointer.

Replaces allocation of memory (strdup) with just pointer assignment
(since execve is being called anyway).

Checks for  !*lv_name only when lv_name is defined.
(and as I'm not quite sure what state this really is - putting a FIXME
around - as this rather looks suspicios ??).

Add debug print of passed clvmd args.
2011-09-22 09:47:34 +00:00
Zdenek Kabelac
d283be0597 Add all exclusive locks to clvmd restart option args
Fix bug when only every even lock has been passed.

Warning: currently -E causes clvmd to exit with usage text being printed.
2011-09-22 09:45:24 +00:00
Alasdair Kergon
fb38615cf6 FIXMEs to note problems with some error paths. (Perhaps safer to disable
them until they can be fixed completely?)
2011-09-21 16:36:39 +00:00
Milan Broz
c7ea2c24f8 Always sent the whole command header in restart/reload clvmd commands.
(Newly added check catch this as invalid packet.)

(N.B. that code is so fragile that it need full rewrite soon:-)
2011-09-21 13:40:46 +00:00
Zdenek Kabelac
a456d3c77f Check for failing 'stat' and skip this loop iteration
(since data in statbuf are invalid).

Check whether sysconf managed to find _SC_PAGESIZE.

Report at least debug warning about failing unlink
(logging scheme here seems to be a different then in lvm).

Duplicate terminal FDs and use similar code as is made in clvmd
and cleanup warns about missing open/close tests.
FIXME: Looks like we already have 3 instancies of the same code in lvm repo.
2011-09-21 10:42:53 +00:00
Zdenek Kabelac
1a82ef4386 Add missing log_error() to lvresize command when fsadm tool fails
Also add test case
2011-09-21 10:39:47 +00:00
Zdenek Kabelac
8ae350ee06 Unmount works with -y 2011-09-19 19:37:26 +00:00
Zdenek Kabelac
1c088b0a2b Add support for DM_DEV_DIR
Follow other commands support this directory setting.
Useful for test suite.
2011-09-19 19:36:52 +00:00
Zdenek Kabelac
dab5a7fad0 Another vgremove for speedup
It seems like test-suite is significantly slowed down on removal phase.

Maybe it should be part of generic teardown - though the reason of slowdown
should be also discovered -  probably related to retry loop ?
2011-09-19 18:40:51 +00:00
Zdenek Kabelac
cc8b9dd973 Revert patch
Caller of exec must report log_error when rstatus is passed.
2011-09-19 18:38:43 +00:00
Zdenek Kabelac
a6fd7ce6a5 Ensure fsadm.sh is executable
When the srcdir == builddir we get the link on non-exectable file.
So make always sure fsadm.sh is executable script file.

Add cleanup target for lib/fsadm.
2011-09-19 15:54:15 +00:00
Zdenek Kabelac
7b3831d001 Use log_error instead of log_verbose when executed command fails 2011-09-19 14:54:23 +00:00
Zdenek Kabelac
c110bc0154 Add support for non /dev devices
Since test suite is not using /dev - add support for such dirs into fsadm.
2011-09-19 14:52:33 +00:00
Jonathan Earl Brassow
e9bafdd2ce fix compiler warning.
Compiler says variable may be used uninitialized.  It can't be, but we
initialize the variable to NULL anyway.  Also, remove the double initialization
of another variable.
2011-09-19 14:28:23 +00:00
Zdenek Kabelac
7aa34a6a19 Enable t-fsadm test
Now all commands are executed from preffered directory.
2011-09-19 13:52:22 +00:00
Zdenek Kabelac
304c544ce5 Support different PATH setting
When fsadm is test - it needs to execute lvm and fsadm from non-standard path
setting.   So adding a support in fsadm script when user set LVM_BINARY, then
the lvm command invoced from fsadm will have the same PATH setting as before
entering  fsadm command.

Needed for testing.
2011-09-19 13:51:09 +00:00
Zdenek Kabelac
9ee3f61aee Surround all executed commands with quotes
In case someone would use filename paths with spaces when changing
this script surround commands with '"'.

With default settings there is no change in behavior.
2011-09-19 13:47:37 +00:00
Zdenek Kabelac
ff9808505b Fix missing '$' in test 2011-09-19 13:43:50 +00:00
Zdenek Kabelac
e2749ba847 Add link for fsadm script
Prepare to enabling t-fsadm.sh test
2011-09-19 12:50:17 +00:00
Zdenek Kabelac
80b6a3a309 Test speed up
Removing vg  reduces time execution of this test.
Teardown has some wait states here...
2011-09-19 12:49:14 +00:00
Zdenek Kabelac
1207e7a5b1 Move debug message
so it does not look like we are executing command in the middle of
critical_section in log trace.
2011-09-19 12:48:02 +00:00
Zdenek Kabelac
fa6b41848e Few more updates to lvmetad build deps
It seem lvmetad deps must be expressed after the include.
Also adding lvmetad deps to device-mapper target in daemons dir.
2011-09-19 00:29:11 +00:00
Zdenek Kabelac
48f57c1cf0 Build fixes for lvmetad
Should now be giving better build order and install lvmetad.
2011-09-17 14:50:22 +00:00
Zdenek Kabelac
6a9d54829f More gcc warnings removed 2011-09-17 14:49:18 +00:00
Zdenek Kabelac
77bc50144c Fix for gcc compilation warnings
and put _XOPEN_SOURCE so pthread_mutexattr is properly defined.
2011-09-17 13:33:51 +00:00
Jonathan Earl Brassow
a94299f451 Fix Bug 738832 - core to disk log conversion fails with internal error
This bug showed up when trying to add a log to a mirror whose images are on
multiple devices.  This is an intra-release regression and no WHATS_NEW
entry will be added.  The error was introduce in the following commit:
	2d8a2f35c7

The solution is to recognise in _alloc_init that if there are no mirrors
or stripes specified, then 'new_extents' should be zero.
2011-09-16 18:39:03 +00:00
Jonathan Earl Brassow
5c11544dc1 After suspend/resume following a splitmirror op, call sync_local_dev_names
to settle udev before calling deactivate_lv.

This is an intra-release regression (no WHATS_NEW entry required).  It is
part of the fix for the current WHATS_NEW entry:
  Work around resume_lv causing error LV scanning during splitmirror operation.
2011-09-16 16:41:37 +00:00
Milan Broz
747060fe19 Fix clvmd processing of invalid request on local socket. (rommer)
Code now detects small packet and wrong arglen and reply with
error intead of infinite loop.

https://bugzilla.redhat.com/show_bug.cgi?id=738484
2011-09-16 14:40:06 +00:00
Milan Broz
40f8cf3f7c Disable t-lvcreate-large.sh for now with cluster. 2011-09-16 14:32:47 +00:00
Zdenek Kabelac
ba230df659 Add inital thin testing
Currently test is skipped by default (since it needs code hack to work)
Check command line options to create & remove thin pools and thin volumes.

Activation code for thin LV support is missing, thus it only works without
driver loaded.
2011-09-16 12:23:59 +00:00
Zdenek Kabelac
528c26a1ce Remove thin volumes before thin pools
When user wants to remove thin pool - check if there are no thin volumes using it.
If so - query before removal (or -ff for no question) and remove them first.
2011-09-16 12:12:51 +00:00
Zdenek Kabelac
75394455bc Fix command line option decoding
LVM has huge set of options now - it's approaching 60 short-arg less options
and we get interesting case of misdetection for 'merge' option which has been
put into the middle of options with 'short_arg' - thus certainly past 65. (ASCII 'A').

To avoid confusion of short_arg with long_opt number - add  '128' to all such
non-short-arg options.
2011-09-16 12:10:02 +00:00
Zdenek Kabelac
b565cb3503 Update 2011-09-16 12:01:48 +00:00
Zdenek Kabelac
6e181ba96d Reset LV status when unlinking LV from VG
When LV is unlinked, we want to catch problem in vg_validate,
that LV has changed.

i.e. catch LV has been removed and is no long thin_pool while still
being referenced by some thin volume.
2011-09-16 11:59:22 +00:00
Zdenek Kabelac
cd1e29b180 Trim spaces on EOL 2011-09-16 11:53:14 +00:00
Zdenek Kabelac
b0f8a418b5 Remove vg at the test's end
Speedup this test by removing mirrored vg when it's not longer needed.
teardown() seems to be waiting here for too long.
2011-09-16 11:52:33 +00:00
Petr Rockai
adc43e35d3 Update WHATS_NEW. 2011-09-16 10:02:14 +00:00
Petr Rockai
02155338a6 Fix the divisibility check in the allocator for the mirror+stripe case (require
divisibility by stripe count alone, not by (mirror*stripe)).
2011-09-16 09:59:42 +00:00
Petr Rockai
396ebcef66 Update WHATS_NEW. 2011-09-15 20:01:21 +00:00
Petr Rockai
a9f4775dc3 When resizing LVs, always round in the safe direction, regardless of whether we
were called as lvresize or lvreduce.
2011-09-15 18:51:11 +00:00
Milan Broz
34d290b424 Fix possible overflow of size if %FREE or %VG is used.
https://bugzilla.redhat.com/show_bug.cgi?id=737087
2011-09-15 15:26:40 +00:00
Milan Broz
8d1a933b76 Fix vgchange activation of snapshot with virtual origin. 2011-09-14 18:20:03 +00:00
Zdenek Kabelac
52301e7c5d Fix memory overwrite
Transfer of build_dm_uuid() function into libdm made uuid_prefix as parameter,
thus sizeof() was replaced with strlen() and room for '\0' missed.

As it's only fix in current version - no whatsnew.
2011-09-14 16:07:07 +00:00
Milan Broz
d9f3c324e6 Activate virtual snapshot origin exclusively (only on local node in cluster). 2011-09-14 14:20:16 +00:00
Zdenek Kabelac
719a9e4605 Add suggest parentheses around '&&'
Follow gcc suggestion.
2011-09-14 10:03:15 +00:00
Zdenek Kabelac
3bb7050e14 LVM_WRITE and LVM_READ are 64bit constants
Revert John patch, which fixed only 1 place where ~LVM_WRITE was in use and
convert ommited LVM_READ/WRITE flags to 64bit constants as well.
(Since both 'status' flags for LV and VG are 64bit.)
2011-09-14 09:57:35 +00:00
Zdenek Kabelac
572d99cacc Add missing underscores to local static functions 2011-09-14 09:54:21 +00:00
Zdenek Kabelac
f89aa98af5 Keep the old-style function definition 2011-09-14 09:53:32 +00:00
Jonathan Earl Brassow
0168b2fe11 Additional fixes for lv_mirror_count.
Changing lv_mirror_count to only count the AREA_LVs made the function
stop working for PVMOVE mirrors.  A conditional has been added to fix
that problem.  Additionally, when counting the images in a mirror stack,
we don't need to subtract 1 from the count we get back from the
lv_mirror_count call on the temporary mirror layer.  (This is because we
are no falsely counting the top layer of the temporary mirror.)
2011-09-14 04:10:26 +00:00
Jonathan Earl Brassow
2c9cf3b73b Fix for bug 734252 - problem up converting striped mirror after image failure
lv_mirror_count was not able to handle mirrors of stripes properly.  When a
failed device is removed, the MIRRORED status flag is removed from the LV
conditionally based on the results of lv_mirror_count.  However, lv_mirror_count
trusted the MIRRORED flag - thinking any such LV must be mirrored.  It would
happily assign first_seg(lv)->area_count as the number of mirrors, but when
a mirrored striped LV was reduced to a simple striped LV area_count would be
the number of /stripes/ not the number of /mirrors/.  A result higher than 1
would be returned from lv_mirror_count, the MIRRORED flag would not be cleared,
and the LV would fail to be up-converted properly in lvconvert_mirrors_aux
because of it.
2011-09-14 02:45:36 +00:00
Jonathan Earl Brassow
eb0af241f1 Fix bug 733400 - Mirror down conversion when specifying the secondary leg is broke
The operation of deactivating the residual error target LV after removing a
mirror layer can cause a "device in-use" conflict with udev.  Giving udev a
poke before calling deactivate_lv eliminates the conflict.  The stick used
to poke udev is 'sync_local_dev_names'.
2011-09-13 21:13:33 +00:00
Jonathan Earl Brassow
39feace42e Fix for bug 737200 - Can't create mirrored-log mirror on a VG with small extents
Kernel requires a mirror to be at least 1 region large.  So,
if our mirror log is itself a mirror, it must be at least
1 region large.  This restriction may not be necessary for
non-mirrored logs, but we apply the rule anyway.

(The other option is to make the region size of the log
mirror smaller than the mirror it is acting as a log for,
but that really complicates things.  It's much easier to
keep the region_size the same for both.)
2011-09-13 18:42:57 +00:00
Jonathan Earl Brassow
2d8a2f35c7 Better fix for bug 737125 - unable to create mirror on 1K extent size VG
WHATS_NEW entry:
Fix log size calculation when only a log is being added to a mirror.

The original fix pass the mirror LV to allocate_extents (rather than
passing NULL) so that _alloc_init could correctly determine the necessary
size of the mirror log.  In the previous check-in, I noted:
    In order to get a decent value computed, we need to pass in the 'lv' argument
    to allocate_extents.  This would normally imply a desire for cling/contiguous
    allocation to the given LV, but since we are not allocating any parallel
    extents and only log extents, it works fine.
However, passing in the LV did have unintended consequences on the placement of
the log.  The better solution is to pass in the number of extext that are in
the mirror LV instead of the LV itself.  This will not cause the allocator to
reserve that number of extents, because 'stripes' and 'mirrors' are specified
as 0.  Thus, 'extents' is used to calculate the size of the log, but won't
affect how much is allocated.
2011-09-13 18:11:38 +00:00
Jonathan Earl Brassow
d954a7f991 Changing RAID status flags to 64-bit broke some binary flag operations.
LVM_WRITE is a 32-bit flag.  Now that RAID[_IMAGE|_META] are 64-bit,
and'ing a RAID LV's status against LVM_WRITE can reset the higher order
flags.

A similar thing will affect thinp flags if not careful.
2011-09-13 16:33:21 +00:00
Peter Rajnoha
40a234183c Retry DM_DEVICE_REMOVE ioctl if device is busy.
This is a workaround for long-lasting problem with using the WATCH udev
rule. When trying to remove a DM device, this one can still be opened
while processing the event in parallel (generated based on the WATCH
udev rule).

Let's use this until we have a proper solution.
2011-09-13 15:13:41 +00:00
Jonathan Earl Brassow
bfd774a20a Fix for bug 737125 - unable to create mirror on 1K extent size VG
_alloc_init calculates the number of necessary log extents via
'mirror_log_extents'.  'mirror_log_extents' takes 3 arguments: region_size,
pe_size, and size of the mirror LV.  Unfortunately, _alloc_init is guessing at
the mirror size by using 'ah->new_extents / ah->area_multiple' - the number of
extents that the mirror images have.  However, this is /always/ wrong when
allocating the log separately.  Further, the log is always allocated separately
unless we are up-converting the mirror at the same time.  It was by luck alone
that a default value of '1' reflects what we want in most cases.

In order to get a decent value computed, we need to pass in the 'lv' argument
to allocate_extents.  This would normally imply a desire for cling/contiguous
allocation to the given LV, but since we are not allocating any parallel
extents and only log extents, it works fine.
2011-09-13 14:37:48 +00:00
Jonathan Earl Brassow
7d8e615c0b Fix for bug 733114.
When an image is split from a 2-way mirror, the original mirror is converted to
a linear device.  To do this, the top "layer" must be removed.  The segments
are transferred from the sub-lv to the top-level LV and the link is severed.
The former sub-lv - having its segments transferred - now contains a temporary
error target.

When the original LV is resumed, the old sub-lv that now contains an error
segment is activated and scanned.  This is what causes the I/O error messages.
There are three ways to fix this problem:

1) Do not set the sub-lv which contains the error target as "visible" before
suspending the original LV.  This way, when the original is resumed, the sub-lv
device node is not created and it is not scanned - avoiding the error messages.
 The problem with this approach is that if the machine crashes after the
resume, it leaves the *hidden* LV in place and the user has a more difficult
time noticing that it needs to be cleaned up.  Thus, this type of processing is
frowned upon.

2) Do like _remove_mirror_images does and suspend the original, then suspend
the sub-lv (the error target), then resume the sub-lv, and finally resume the
original LV.  This seems like extra pointless operations to me, but it does not
produce the error message (although, I'm not sure why) and it allows us to
leave the visible flag in place.

3) Flag the sub-lv (error target) with a "do not scan" flag.  This seems like
the cleanest approach, but I have been unable to find the method for doing
this.  LVs get tagged in such a way by _get_udev_flags, but in this case the
resume of the original LV also resumes the error target LV without running it
through _get_udev_flags (likely because they are no longer linked).  Could
there be something wrong in resume_lv?

Option #2 was chosen to fix this bug, but it seems like more of a workaround
for now.
2011-09-13 13:59:19 +00:00
Jonathan Earl Brassow
0bab63a818 reorder some status flag printing in gdbinit file. 2011-09-13 13:57:02 +00:00
Zdenek Kabelac
3f21a5341d Updated testing script
Since attrs were extended by 2 new fields update check functions.
2011-09-09 13:19:19 +00:00
Alasdair Kergon
d8487f1f4f Append z to lv_attr if new blocks will be zeroed. 2011-09-09 01:15:18 +00:00
Alasdair Kergon
5bd65a2834 Add a new 'thin_pool' output field to 'lvs.
A gentle reminder that anyone relying on the output of reporting commands
like lvs in scripts must use -o to guarantee they get the fields they expect.

The default sequence of fields can change from release to release.
Equally, the 'attr' fields can have new values introduced and/or characters
appended to them.
2011-09-09 00:54:49 +00:00
Jonathan Earl Brassow
61b33c0a0a Adjust gdbinit script to new RAID status flag values. 2011-09-08 22:19:45 +00:00
Alasdair Kergon
a083187168 Add 7th lv_attr char to show the related kernel target.
Add thin volume types to lv_attr.
2011-09-08 20:55:39 +00:00
Alasdair Kergon
af71952d7a lvcreate/remove thin_pool and thin volumes (--driverloaded n only) 2011-09-08 16:41:18 +00:00
Alasdair Kergon
ad67fb4b73 Terminate pv_attr field correctly. (2.02.86) 2011-09-07 13:42:00 +00:00
Zdenek Kabelac
e6273046f4 Fix typo 2011-09-07 09:48:49 +00:00
Zdenek Kabelac
4961eeba52 Check if lp.origin exists
Currently needed for _determine_snapshot_type().
2011-09-07 09:25:51 +00:00
Zdenek Kabelac
58da345e4e Improve man page style
Only reformat man pages.
2011-09-07 08:50:35 +00:00
Zdenek Kabelac
8ff2e245ac Support break for vgchange and vgrefresh operation
Allow to break some lengthy vgchange and vgrefresh operation.
2011-09-07 08:41:47 +00:00
Zdenek Kabelac
83ca5e6d5c Remove unused passed parameters 2011-09-07 08:37:48 +00:00
Zdenek Kabelac
d05c996d48 Minor change for pv_create api
Switch int to unsigned type.
2011-09-07 08:34:21 +00:00
Zdenek Kabelac
867e4cfffb Replace char class :space: with explicit chars
Some major distributions are still using 'mawk' and they are not using
the latest version - we end here with hidden dependency on the latest
version of mawk (1.3.4) while i.e. Debian Lenny seems to stay with 1.3.3.
So we end with completely broken  vgimportclone script on such system.

We would need to check for proper support of :space: and abort build if
it doesn't work or simplier replace [:space:] with [ \t] which seems
sufficient to make it work (as can be seen in this patch)

A better fix would be to use command line parameter override - leaving
as FIXME comment.

This patch makes t-vgimportclone.sh test passing on Lenny.
2011-09-07 08:31:16 +00:00
Alasdair Kergon
10ab5428df pool attach fns & more field renaming 2011-09-06 22:43:56 +00:00
Zdenek Kabelac
704cb6b4e9 Convert data->pool 2011-09-06 22:35:44 +00:00
Alasdair Kergon
c455c4c1d9 add thin_manip.c like the other manip files
move basic lv_is_* to macros
data_lv -> pool_lv - we decided to call it 'pool' everywhere now
2011-09-06 19:25:42 +00:00
Alasdair Kergon
d19192b20e Start using 64-bit status flags - most of the code already handles them.
tdata -> tpool
remove commented out definitions from metadata.h
formatting clean-ups
2011-09-06 18:49:31 +00:00
Zdenek Kabelac
054c6c890a Detect sscanf recovering_region input error
Missing check for sscanf found by static analyzer.
2011-09-06 18:24:27 +00:00
Zdenek Kabelac
ebb39e1041 Fix memory leak of allocated bitmap in error path
Found by static analyzer.
2011-09-06 18:15:43 +00:00
Zdenek Kabelac
c5120c1a7a Log unlink() error 2011-09-06 18:11:21 +00:00
Alasdair Kergon
0de26cd346 else 2011-09-06 15:39:46 +00:00
Alasdair Kergon
77729280f6 tdata->tpool 2011-09-06 15:38:44 +00:00
Alasdair Kergon
f5a8f2d2ca only thin volumes need converting 2011-09-06 15:35:11 +00:00
Alasdair Kergon
e4b64bf719 lvcreate parsing for thin provisioning.
The rest is incomplete so this isn't usable yet.
2011-09-06 00:26:42 +00:00
Alasdair Kergon
096dbb58f9 Remove incorrect requirement for -j or -m from lvchange error message. 2011-09-05 12:54:29 +00:00
Zdenek Kabelac
0b13efd7e5 Hack for build rules of lvmetad
FIXME
2011-09-02 14:49:00 +00:00
Zdenek Kabelac
d522fa71f0 Add missing 'static' for local function
Avoid missing prototype warning.
2011-09-02 12:38:43 +00:00
Zdenek Kabelac
ccc01ce94b Compile fix
Reflecting change in dm_config_value  float r -> float f.
2011-09-02 11:04:12 +00:00
Alasdair Kergon
0031b78938 temp notes on dealing with cascade 2011-09-02 01:59:07 +00:00
Alasdair Kergon
5c216d6eb2 Move cascade inside libdm etc.
Makes dumpconfig whole-section output wrong in a different way from before,
but we should be able to merge cft_cmdline properly into cmd->cft now and
remove cascade.
2011-09-02 01:32:08 +00:00
Alasdair Kergon
c147bbddb3 Comments, FIXMEs, name changes. 2011-09-01 21:04:14 +00:00
Jonathan Earl Brassow
985a6a65cb Updates to gdbinit file
1) Add more status flags for printing
2) Fix bug in lv_status_r processing for mirror segment type
2011-09-01 21:01:12 +00:00
Jonathan Earl Brassow
9ab111abc0 Fix for bug 732142: Unsafe table load during mirror image split
There was a bad sequence:
*) Make changes to LV layout to split images (e.g. 4-way -> 2-way/2-way)
1) vg_write, suspend_lv(original_mirror), vg_commit
2) activate_lv(newly_split_lv)
3) resume_lv(original_mirror)

Step #2 is not allowed.  However, without it, the resume of the original
mirror will also resume its former sub-LVs - making it impossible to
activate the newly split LV due to the changes in layering, pointers, and
names that had already been made.  Additionally, the resume or the original
brings the sub-lv's online with names that differ from the metadata on disk -
also a no-no.  Thus, the split must be done in stages such that the active LVs
always reflect what is in the committed LVM metadata.

First, alter the original mirror by releasing the images.  The images are made
visible and independent as an intermediate stage.  (This way, we can have
consistency between LVM metadata and active LVs.)  The second stage collects
the recently split LVs, deactivates them, forms them into a mirror if necessary,
and then activates them.  It is a bit of a circuitous method, but it is the only
way to split a mirror from a mirror and obey these general rules:
1) Never [de]activate sub-lvs when the top-level LV is suspended
2) Avoid having active LVs that differ from the description in the LVM metadata

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2011-09-01 19:22:11 +00:00
Alasdair Kergon
eced331945 Add comments & remove always-included header. 2011-09-01 17:58:27 +00:00
Alasdair Kergon
6f6b2cdc52 Only build libdaemon.a when we need it at the moment. 2011-09-01 17:56:53 +00:00
Zdenek Kabelac
d91c270d1c Use const casting when it's needed
Keep the lookup operation const and use const casting at the dm_ function level.
2011-09-01 14:02:05 +00:00
Zdenek Kabelac
1f5226d652 Match the prototype old-style declaration 2011-09-01 13:30:11 +00:00
Zdenek Kabelac
997dabdb9e Keep the old-style prototypes 2011-09-01 13:25:50 +00:00
Zdenek Kabelac
b90e9912f1 Use size_t return type
Since these function returns buffer size - use size_t type for them.
2011-09-01 10:25:22 +00:00
Zdenek Kabelac
fa784f6198 Mark unreleased memory pools as internal error 2011-09-01 10:19:01 +00:00
Zdenek Kabelac
066465803c Reflect dm_config API update 2011-09-01 10:16:32 +00:00
Petr Rockai
118fa896b7 Replace const usage of dm_config_find_node with more appropriate value-lookup
functionality. A number of bugs (copied and pasted all over the code) should
disappear:

- most string lookup based on dm_config_find_node would segfault when
  encountering a non-zero integer (the intention there was to print an
  error message instead)
- check for required sections in metadata would have been satisfied by
  values as well (i.e. not sections)
- encountering a section in place of expected flag value would have
  segfaulted (due to assumed but unchecked cn->v != NULL)
2011-08-31 15:19:19 +00:00
Petr Rockai
4c76c264dc Mention --enable-lvmetad in WHATS_NEW. 2011-08-31 12:47:59 +00:00
Petr Rockai
66fc323292 Do not call prepare_lvmetad just yet in tests. 2011-08-31 12:42:55 +00:00
Petr Rockai
dbe351860e Fix warnings and constness handling in lvmetad-core (adjusting the
dm_config_find_node to give non-const node pointer, since that better reflects
the contract of that function).
2011-08-31 12:39:58 +00:00
Petr Rockai
178675bec5 Fix warnings in daemons/common. 2011-08-31 12:18:40 +00:00
Petr Rockai
2254b74598 Get rid of the old lvmetad Makefile. 2011-08-31 11:44:27 +00:00
Zdenek Kabelac
26c828a598 Autoreconf for lvmetad patch 2011-08-31 11:39:32 +00:00
Petr Rockai
c4d973530f A compromise integration of LVMetaD into the build: I have kept all the
daemon/common code in a single libdaemon.a, which is completely private. This
is currently linked into the lvmetad binary, and will be linked into LVM (the
client part, since static linking only picks up only symbols that are actually
used). I have also added --enable/disable-lvmetad to ./configure; although the
current default is off, I expect this to be flipped to on shortly. There's no
LVM-side support yet, but when there is, even when built, it'll still need to
be enabled by an lvm.conf option.
2011-08-31 11:31:57 +00:00
Zdenek Kabelac
731c45ff37 Update for resource leak 2011-08-31 08:23:33 +00:00
Zdenek Kabelac
0b97ce28d6 Fix resource leak when strdup fails
Static analyzer noticed, strdup failing path leaks dmt structure.
2011-08-31 08:23:05 +00:00
Petr Rockai
51e1300519 Adapt LVMetaD to use the new dm_config interfaces. 2011-08-30 15:44:01 +00:00
Petr Rockai
5c43b9bda7 Adapt the daemon/common code to use the new dm_config interface. 2011-08-30 15:42:56 +00:00
Petr Rockai
d60c24dda8 Move the core of the lib/config/config.c functionality into libdevmapper,
leaving behind the LVM-specific parts of the code (convenience wrappers that
handle `struct device` and `struct cmd_context`, basically). A number of
functions have been renamed (in addition to getting a dm_ prefix) -- namely,
all of the config interface now has a dm_config_ prefix.
2011-08-30 14:55:15 +00:00
Peter Rajnoha
4020e6db24 Directly allocate buffer memory in a pvck scan instead of using a mempool.
There's a very high memory usage when calling _pv_analyse_mda_raw (e.g. while
executing pvck) that can end up with "out of memory".

_pv_analyse_mda_raw scans for metadata in the MDA, iteratively increasing the
size to scan with SECTOR_SIZE until we find a probable config section or we're
at the edge of the metadata area. However, when using a memory pool, we're also
iteratively chasing for bigger and bigger mempool chunk which can't be found
and so we're always allocating a new one, consuming more and more memory...

This patch just changes the mempool to direct memory allocation in this
problematic part of the code.
2011-08-29 13:37:36 +00:00
Alasdair Kergon
38c4b48f33 same for segtype_is_thin 2011-08-26 18:17:05 +00:00
Alasdair Kergon
dc2a994176 seg_is_thin includes both thin_pool and thin_volume 2011-08-26 18:15:14 +00:00
Alasdair Kergon
93b7108bf7 thin - hide unimplemented dso fn; remove duplicate origin_lv field; add
some lvcreate struct parms
2011-08-26 17:40:53 +00:00
Zdenek Kabelac
6fb30e2313 Initial code for read/write of thin metadata lv segments 2011-08-26 13:37:47 +00:00
Zdenek Kabelac
b300359824 Add registration of thin_pool segment
Register thin and thin_pool segment via multiple_segtypes.
2011-08-25 10:00:09 +00:00
Alasdair Kergon
fbacf137f2 Fix raid shared lib segtype registration (2.02.87). 2011-08-24 13:41:46 +00:00
Zdenek Kabelac
750fff3bc7 Initial code layout for thin provisioning target
Only registers init_thin_segtype

Option --with-thin=internal needed for compilation.
For now useful only for developememt!
2011-08-24 08:27:49 +00:00
Alasdair Kergon
0064f3a9a8 . 2011-08-19 23:01:20 +00:00
Alasdair Kergon
8f263acd48 Remove incorrect error message added in 2.02.87. 2011-08-19 22:55:07 +00:00
Alasdair Kergon
4c503ee2a6 post-release 2011-08-19 19:42:39 +00:00
Alasdair Kergon
13023bf932 clarify comment 2011-08-19 19:35:50 +00:00
Alasdair Kergon
8f6b060e7f tweak split/track/merge
How do you discover the value of N?
2011-08-19 17:36:13 +00:00
Alasdair Kergon
37086f40a1 spaces->tabs 2011-08-19 17:02:48 +00:00
Alasdair Kergon
946f96cda5 revert incomplete inconsistent log message change for now 2011-08-19 16:49:00 +00:00
Alasdair Kergon
c6a99b7ccb avoid multi-line calc with incorrect intermediate var contents 2011-08-19 16:41:26 +00:00
Alasdair Kergon
ab52d127bb pre-release 2011-08-19 16:31:00 +00:00
Alasdair Kergon
6ab3b611c7 restrict dm_tree_node_add_null_area 2011-08-19 16:26:02 +00:00
Alasdair Kergon
735254fba4 _ for static fns 2011-08-19 15:59:15 +00:00
Jonathan Earl Brassow
29ab9ad5b3 Move RAID convert tests to new file, t-lvconvert-raid.sh
There is duplicate code in t-lvconvert-raid.sh and t-lvcreate-raid.sh.
This should be moved into a common file which is then sourced by these two
files.  I'll wait to move the duplicate code until I can talk to mornfall.
2011-08-18 19:56:17 +00:00
Jonathan Earl Brassow
b87604e649 Add ability to merge back a RAID1 image that has been split w/ --trackchanges
Argument layout is very similar to the merge command for snapshots.
2011-08-18 19:43:08 +00:00
Jonathan Earl Brassow
4fad401cd2 Add support for m-way to n-way up-convert in RAID1 (no linear to n-way yet)
This patch adds the ability to upconvert a raid1 array - say from 2-way to
3-way.  It does not yet support upconverting linear to n-way.

The 'raid' device-mapper target allows for individual components (images) of
an array to be specified for rebuild.  This mechanism is used when adding
new images to the array so that the new images can be resync'ed while the
rest of the images in the array can remain 'in-sync'.  (There is no
mirror-on-mirror layering required.)
2011-08-18 19:41:21 +00:00
Jonathan Earl Brassow
cc00073da7 Add the ability to split an image from the mirror and track changes.
~> lvconvert --splitmirrors 1 --trackchanges vg/lv
The '--trackchanges' option allows a user the ability to use an image of
a RAID1 array for the purposes of temporary read-only access.  The image
can be merged back into the array at a later time and only the blocks that
have changed in the array since the split will be resync'ed.  This
operation can be thought of as a partial split.  The image is never completely
extracted from the array, in that the array reserves the position the device
occupied and tracks the differences between the array and the split image via
a bitmap.  The image itself is rendered read-only and the name (<LV>_rimage_*)
cannot be changed.  The user can complete the split (permanently splitting the
image from the array) by re-issuing the 'lvconvert' command without the
'--trackchanges' argument and specifying the '--name' argument.
	~> lvconvert --splitmirrors 1 --name my_split vg/lv
Merging the tracked image back into the array is done with the '--merge'
option (included in a follow-on patch).
	~> lvconvert --merge vg/lv_rimage_<n>

The internal mechanics of this are relatively simple.  The 'raid' device-
mapper target allows for the specification of an empty slot in an array
via '- -'.  This is what will be used if a partial activation of an array
is ever required.  (It would also be possible to use 'error' targets in
place of the '- -'.)  If a RAID image is found to be both read-only and
visible, then it is considered separate from the array and '- -' is used
to hold it's position in the array.  So, all that needs to be done to
temporarily split an image from the array /and/ cause the kernel target's
bitmap to track (aka "mark") changes made is to make the specified image
visible and read-only.  To merge the device back into the array, the image
needs to be returned to the read/write state of the top-level LV and made
invisible.
2011-08-18 19:38:26 +00:00
Jonathan Earl Brassow
bd1f90fd60 Add --splitmirrors support for RAID1 (1 image only)
Users already have the ability to split an image from an LV of "mirror"
segtype.  This patch extends that ability to LVs of "raid1" segtype.

This patch only allows a single image to be split off, however.  (The
"mirror" segtype allows an arbitrary number of images to be split off.
e.g.  4-way => 3-way/linear, 2-way/2-way, linear,3-way)
2011-08-18 19:34:18 +00:00
Jonathan Earl Brassow
7058ed4431 When down-converting RAID1, don't activate sub-lvs between suspend/resume
of top-level LV.

We can't activate sub-lv's that are being removed from a RAID1 LV while it
is suspended.  However, this is what was being used to have them show-up
so we could remove them.  'sync_local_dev_names' is a sufficient and
proper replacement and can be done after the top-level LV is resumed.
2011-08-18 19:31:33 +00:00
Alasdair Kergon
4332bb3951 Add -V as short form of --virtualsize in lvcreate. 2011-08-17 15:15:36 +00:00
Jonathan Earl Brassow
c5bfcfd65e Compiler warning fixes, better error messaging, and cosmetic changes.
1) add new function 'raid_remove_top_layer' which will be useful
to other conversion functions later (also cleans up code)
2) Add error messages if raid_[extract|add]_images fails
3) Add function prototypes to prevent compiler warnings when
compiling with '--with-raid=shared'
2011-08-13 04:28:34 +00:00
Alasdair Kergon
3bb841dfa1 makefile fixes 2011-08-12 13:03:35 +00:00
Alasdair Kergon
571a3ecff0 core removed with clean not distclean 2011-08-12 12:56:38 +00:00
Milan Broz
19a05d9764 Do not remove makefile during "make clean". 2011-08-12 07:57:24 +00:00
Alasdair Kergon
927e33104a post-release 2011-08-12 02:34:08 +00:00
Alasdair Kergon
2955689c71 more pre-release fixes 2011-08-12 02:16:46 +00:00
Alasdair Kergon
a35e96114f Drop CCS* variables no longer used. 2011-08-12 01:52:16 +00:00
Alasdair Kergon
5fd6065e99 pre-release 2011-08-12 01:34:11 +00:00
Jonathan Earl Brassow
0cf914b095 Various code clean-ups (s/malloc/zalloc/, new msgs, etc)
Fix a couple more issues that kabi found.
- Add some error messages in failure cases
- s/malloc/zalloc/
- use vg->vgmem for lv names instead of vg->cmd->mem
2011-08-11 21:32:18 +00:00
Zdenek Kabelac
787fa3705d Fix memleak of geometry buffer
Looks like this function is not used too often - thus leak was discovered
by static analyzis (Coverity).
2011-08-11 20:49:33 +00:00
Jonathan Earl Brassow
a684497b3d Add missing checks for function return codes.
Some functions were being called without having their return values checked.
2011-08-11 19:38:00 +00:00
Zdenek Kabelac
18d2cd332b Trivial, add void to ignore dm_snprinf result 2011-08-11 19:21:42 +00:00
Alasdair Kergon
9a9dde2d8c pre-release fixes incl make distclean and configure --with-raid=none/shared 2011-08-11 19:18:17 +00:00
Jonathan Earl Brassow
470f8b1266 Add some log_error msg's and fix potential segfault
Thanks to kabi for spotting these - especially the possibility for
segfault if a loop runs all the way through without finding a match.
2011-08-11 19:17:10 +00:00
Jonathan Earl Brassow
219b1d015b Add ability to down-convert RAID1 arrays.
Also, add some simple RAID tests to testsuite.
2011-08-11 18:24:40 +00:00
Zdenek Kabelac
0e29db4a51 Update udev rules to skip DM flags decoding for removed devices
Skip decoding of DM flags when device is removed.

We currently need DM flags only for add|change events. So forking
dmsetup process for removed devices is a waste of CPU time.

Udev is already quite slow, so make it just a tiny bit faster.
2011-08-11 17:55:29 +00:00
Zdenek Kabelac
fff1723b6f Add detect_internal_vg_cache_corruption to lvm.conf
Add config option to enable crc checking of VG structures.
Currently it's disabled by default.

For the internal test-suite this check it is enabled.

Note: In the case the internal error is detected, debug build with
compile option DEBUG_ENFORCE_POOL_LOCKING helps to catch the source
of the problem.
2011-08-11 17:46:13 +00:00
Zdenek Kabelac
93cecfbfb6 Lock memory for shared VG
Use debug pool locking functionality. So the command could check,
whether the memory in the pool has not been modified.

For lv_postoder() instead of unlocking and locking for every changed
struct status member do it once when entering and leaving function.
(mprotect would trap each such memory access).
Currently lv_postoder() does not modify other part of vg structure
then status flags of each LV with flags that are reverted back to
its original state after function exit.
2011-08-11 17:34:30 +00:00
Zdenek Kabelac
64d62e1ded Add memory pool locking functions
Adding debuging functionality to lock and unlock memory pool.

2 ways to debug code:
crc - is default checksum/hash of the locked pool.
      It gets slower when the pool is larger - so the check is only
      made when VG is finaly released and it has been used more then
      once.Thus the result is rather informative.

mprotect - quite fast all the time - but requires more memory and
           currently it is using posix_memalign() - this could be
	   later modified to use dm_malloc() and align internally.
           Tool segfaults when locked memory is modified and core
	   could be examined for faulty code section (backtrace).

Only fast memory pools could use mprotect for now -
so such debug builds cannot be combined with DEBUG_POOL.
2011-08-11 17:29:04 +00:00
Zdenek Kabelac
7ad0d47c3c Cache and share generated VG structs
Extend vginfo cache with cached VG structure. So if the same metadata
are use, skip mda decoding in the case, the same data are in use.
This helps for operations like activation of all LVs in one VG,
where same data were decoded giving the same output result.

Patch adds 1-to-1 connection between volume_group and lvmcache_vginfo.
2011-08-11 17:24:23 +00:00
Milan Broz
cc2dcab096 Remove dev name prefix from dmsetup line output if major and minor is used. 2011-08-11 17:06:24 +00:00
Peter Rajnoha
25f72cfd07 Fix possible format instance memory leaks and premature releases in _vg_read. 2011-08-11 16:31:40 +00:00
Peter Rajnoha
0390acff8c Suppress locking error messages in monitoring init scripts. 2011-08-11 15:27:46 +00:00
Jonathan Earl Brassow
86f87f9673 Need 'ifdef' checks around RAID monitoring functions as well to catch the
case where the user does not want dmeventd support compiled in.
2011-08-11 14:00:58 +00:00
Milan Broz
f19d62c8b2 Fix build of raid without dmeventd. 2011-08-11 13:30:36 +00:00
Milan Broz
48aa36a654 Never ever use uninitialised descriptors if pipe() fails
(because of maximum open count fd exceeded or so).

Better return busy message, so even clients fail in more controlled way.

Without patch,  clvmd crashes with various strange backtraces
if under heavy load of connection requests.

https://bugzilla.redhat.com/show_bug.cgi?id=698393
https://bugzilla.redhat.com/show_bug.cgi?id=697945

(It still can OOM but that's different issue.)
2011-08-11 12:57:10 +00:00
Jonathan Earl Brassow
9c3fd6948b Add dmeventd monitoring for RAID devices. 2011-08-11 05:00:20 +00:00
Jonathan Earl Brassow
c65ebf3e70 Add RAID metadata devices to considered devices in _add_lv_to_dtree.
_add_lv_to_dtree must also add RAID metadata devices.
2011-08-11 04:18:17 +00:00
Jonathan Earl Brassow
430335e24e Fix renaming of RAID logical volumes.
The function 'for_each_sub_lv', which rename uses, was not handling the
RAID metadata areas.  Thus, the metadata LVs were not being renamed.
2011-08-11 03:29:51 +00:00
Zdenek Kabelac
e56eaf05f9 Just add new lines between header comment 2011-08-10 20:26:41 +00:00
Zdenek Kabelac
e9bfcd0df0 Replace free_vg with release_vg
Move the free_vg() to  vg.c  and replace free_vg  with release_vg
and make the _free_vg internal.

Patch is needed for sharing VG in vginfo cache so the release_vg function name
is a better fit here.
2011-08-10 20:25:29 +00:00
Zdenek Kabelac
340f6f334b Remove INCONSISTENT_VG flag
As this flag could not have been set by the current code - removing it.

Note: because of the wrong code logic this call:

lvmcache_update_vg(correct_vg, correct_vg->status & PRECOMMITTED &
			   (inconsistent ? INCONSISTENT_VG : 0));

had always passed '0' - now after flag removal it's passing
PRECOMMITTED flag in - this present functinal change in this patch.

To match the original functionality - 0 had to be always passed.
More testing is needed here.
2011-08-10 20:17:33 +00:00
Jonathan Earl Brassow
46e3fe8cf4 Fix compiler warning.
Compiler complaining that meta_lv could be used uninitialized.  (Not true
because it is protected by 'clear_metadata'.)  I switched to using 'lv->vg',
as it makes no difference to vg_[write|commit].
2011-08-10 16:44:17 +00:00
Milan Broz
311d319e0a If anything bad happens and unlocking fails
(here clvmd crashed in the middle of operation),
lock is not removed from cache - here is one example:

locking/cluster_locking.c:497       Locking VG V_vg_test UN (VG) (0x6)
locking/cluster_locking.c:113   Error writing data to clvmd: Broken pipe
locking/locking.c:399         <backtrace>
locking/locking.c:461         <backtrace>
  Internal error: Volume Group vg_test was not unlocked

Code should always remove lock info from lvmcache and update counters
on unlock, even if unlock fails.
2011-08-10 16:07:53 +00:00
Milan Broz
6447934cff Initialise clvmd locks before lvm context to avoid open descriptor leaks.
(At least /dev/mapper/control is open after lvm context init.)
2011-08-10 11:00:32 +00:00
Alasdair Kergon
739cca872a autoreconf 2011-08-09 18:14:48 +00:00
Milan Broz
0deb49e2b6 Remove obsoleted GULM clvmd cluster locking support. 2011-08-09 18:11:00 +00:00
Alasdair Kergon
bd7f41d83c Remove support for the original dm ioctl interface version 1.
Leave the basic support for multiple versions in case we have a new version
in future.
2011-08-09 17:56:47 +00:00
Peter Rajnoha
f9213d84e4 Suppress low-level locking errors and warnings while using --sysinit.
Today, we use "suppress_messages" flag (set internally in init_locking fn based
on 'ignorelockingfailure() && getenv("LVM_SUPPRESS_LOCKING_FAILURE_MESSAGES")'.
This way, we can suppress high level messages like "File-based locking
initialisation failed" or "Internal cluster locking initialisation failed".

However, each locking has its own sequence of initialization steps and these
could log some errors as well. It's quite misleading for the user to see such
errors and warnings if the "--sysinit" is used (and so the ignorelockingfailure
&& LVM_SUPPRESS_LOCKING_FAILURE_MESSAGES environment variable). Errors and
warnings from these intermediary steps should be suppressed as well if requested.

This patch propagates the "suppress_messages" flag deeper into locking init
functions. I've also added these flags for other locking types for consistency,
though it's not actually used for no_locking and readonly_locking.
2011-08-09 11:44:57 +00:00
Peter Rajnoha
ab42676eff Remove unused 'origin' variable in lv_remove_single function. 2011-08-05 09:21:13 +00:00
Zdenek Kabelac
67d0232816 Add missing check for allocation failure
Static analyzer discovered missing check.
2011-08-04 17:56:11 +00:00
Zdenek Kabelac
b43795c364 Remove unused inconsistent_seqno
Last usage was removed in Petr's commit related to VG mda repair fix
where relaxed check starts to ignore inconsistencies coming from
PVs that are marked MISSING - thus removing unused variable.
2011-08-04 15:18:10 +00:00
Zdenek Kabelac
9f6098f724 Add 'empty' command
Using empty command ';' here makes the code look nicer.
2011-08-04 14:54:48 +00:00
Zdenek Kabelac
46d6fcc99a Remove meaningless const type qualifiers on cast type
Static analyzis noticed we do not really need them - so removing.
2011-08-04 14:30:51 +00:00
Zdenek Kabelac
63fe43c2f9 Remove self assigment which has no effect
Just a tiny code cleanup found by analyzer.
2011-08-04 14:11:28 +00:00
Zdenek Kabelac
45e96ab875 Minor memory leak fix
Defer the test of the function return value after the string memory is released.
Otherwise in this error path the string would present memory leak.
(Thought in this case we are already out of memory...)
2011-08-04 12:40:24 +00:00
Zdenek Kabelac
65073ca838 Add test for fcntl error in singlenode client code.
Static analyzer noticed this check could be handy.
2011-08-04 12:13:50 +00:00
Zdenek Kabelac
96854e8d35 Remove --force option from lvrename manpage. 2011-08-04 10:14:42 +00:00
Zdenek Kabelac
ced0c8cbd6 Add missing new line in lvrename help text. 2011-08-04 10:12:44 +00:00
Jonathan Earl Brassow
01d49d0e71 Add basic RAID segment type(s) support.
Implementation described in doc/lvm2-raid.txt.

Basic support includes:
- ability to create RAID 1/4/5/6 arrays
- ability to delete RAID arrays
- ability to display RAID arrays
Notable missing features (not included in this patch):
- ability to clean-up/repair failures
- ability to convert RAID segment types
- ability to monitor RAID segment types
2011-08-02 22:07:20 +00:00
Peter Rajnoha
801a1b1352 Change DEFAULT_UDEV_SYNC to 1 so udev_sync is used even without any config.
This should be set by default! Normally we have "activation/udev_sync = 1"
in lvm.conf (example.conf.in). But if we use lvm2 without any config file
(or without a definition within '--config' option) the DEFAULT_UDEV_SYNC
is used instead. Together with verify_udev_operations=0 (when we rely on
udev fully), this can cause races as the node could be missing when needed.

(See also https://bugzilla.redhat.com/show_bug.cgi?id=723144)
2011-08-02 10:49:57 +00:00
Peter Rajnoha
b4c05c8519 Add support for systemd file descriptor handover in dmeventd.
Systemd preloads file descriptors for us and passes them in for
newly spawned daemon when using on-demand fifo (or socket)
based activation.

This patch adds checks for file descriptors preloaded by
systemd and uses them instead of opening the FIFOs again
to properly support on-demand FIFO-based activation.

(We'll change FIFOs to sockets soon - but still this
part of the code will stay almost the same.)
2011-07-28 13:06:50 +00:00
Peter Rajnoha
9482fdae35 Add support for new oom killer adjustment interface (oom_score_adj).
The filename to adjust the oom score was changed in 2.6.36.
We should use oom_score_adj instead of oom_adj (which is still
there under /proc, but it's scheduled for removal in August 2012).

New oom_score_adj uses a range from -1000 (OOM_SCORE_ADJ_MIN,
disable oom killing) to 1000 (OOM_SCORE_ADJ_MAX).
2011-07-28 13:03:37 +00:00
Peter Rajnoha
63019dc656 autoreconf 2011-07-28 12:59:44 +00:00
Peter Rajnoha
c3f5210734 Configure and makefile changes for newly added systemd unit files. 2011-07-28 12:57:26 +00:00
Peter Rajnoha
5c4c36b135 Add systemd unit files for dmeventd. 2011-07-28 12:54:28 +00:00
Petr Rockai
4b20b480b1 lvmetad: Edit the MISSING_PV flags only after making a "reply" copy of the
metadata, which is then serialised and discarded. This fixes a couple of
outstanding TODO items about handling the MISSING flags correctly.
2011-07-25 17:59:50 +00:00
Petr Rockai
0574adcaa1 lvmetad: Check integrity of multiple metadata copies, i.e. ensure that seqno
equality implies metadata equality. Signal error in response to any update
requests that try to overwrite metadata without providing a higher seqno.
2011-07-25 15:51:51 +00:00
Petr Rockai
c33c147c9b lvmetad: A couple of TODOs, and fix a few trivial memory leaks. 2011-07-25 15:33:04 +00:00
Zdenek Kabelac
95dd4eae53 Add simple test for read-only table reload suppression
Just a small test for previous libdm commit.
2011-07-25 08:41:52 +00:00
Alasdair Kergon
f2bf7a1cc0 Fix read-only identical table reload supression. 2011-07-24 23:59:03 +00:00
Zdenek Kabelac
a2146d6548 Compare also file size to detect changed config file
Clvmd detects modifed config file before it takes lv_lock.
If the config file is changed rapidly - the change was ignored within
a seocnd ranged.  This patch adds also compare of file size.
So change like some flag for 0 to 1 would pass unnoticed - but
it's quick fix for failing test suite.

FIXME: Implement inotify solution.
2011-07-21 13:23:48 +00:00
Petr Rockai
3082fa5d4c lvmetad: Obliterate vg_status by returning the same information from
update_pv_status, saving a dozen lines of code and execution time of one
walkthrough of the PV list.
2011-07-20 21:33:41 +00:00
Petr Rockai
7d6fed35c0 lvmetad: Fix a possible infinite loop in vg_status. 2011-07-20 21:27:28 +00:00
Petr Rockai
81535db537 lvmetad: Robustify update_pv_status and remove an useless lookup. 2011-07-20 21:26:18 +00:00
Petr Rockai
b33c9d313f First stab at making lvmetad-core threadsafe. The current design should allow
very reasonable amount of parallel access, although the hash tables may become
a point of contention under heavy loads. Nevertheless, there should be orders
of magnitude less contention on the hash table locks than we currently have on
block device scanning.
2011-07-20 21:23:43 +00:00
Petr Rockai
503190e499 lvmetad: Avoid stale PV -> VG mappings on metadata update. 2011-07-20 18:45:32 +00:00
Petr Rockai
fdbde27ed9 Optimise PV -> VG lookups by using a UUID (hash) map. 2011-07-20 18:34:57 +00:00
Petr Rockai
54350d68ce Free up allocated memory before exiting, in lvmetad. 2011-07-20 18:24:49 +00:00
Petr Rockai
f342b6a862 Fix two small (but eventually unbounded) leaks in daemon-server.c. 2011-07-20 18:23:33 +00:00
Petr Rockai
28b6842556 Can't have a global memory pool in lvmetad (that would constitute an ongoing
memory leak) => remove it (it's been unused anyway).
2011-07-20 16:49:21 +00:00
Petr Rockai
ecb4ee0b5a Make lvmetad also report VGID in reply when adding a PV without MDAs (this
obviously only works for VGs that already had at least some MDA discovered).
2011-07-20 16:46:40 +00:00
Petr Rockai
05e87b6b65 Add code to lvmetad's testclient that scans an actual physical device,
effectively emulating a future "pvscan --lvmetad" command.
2011-07-20 15:15:41 +00:00
Petr Rockai
9f228ea974 Make lvmetad report the VG ID and status (complete, partial) in reply to pv_add
requests.
2011-07-20 15:14:17 +00:00
Petr Rockai
e36cd89437 Towards MISSING (PV) flag management in lvmetad. 2011-07-19 19:15:22 +00:00
Petr Rockai
d8f02da1e4 Make it possible to represent type-correct single-item arrays in config trees. 2011-07-19 19:12:38 +00:00
Petr Rockai
789ed50712 Add an API to config.h for creating values in the config_tree mempool. 2011-07-19 19:11:24 +00:00
Petr Rockai
df4758d73b Work out some more details in metadata update in lvmetad. 2011-07-19 16:48:13 +00:00
Jonathan Earl Brassow
8cccdb25a9 Remove and unneeded parameter from build_parallel_areas_from_lv() 2011-07-19 16:37:42 +00:00
Jonathan Earl Brassow
e1154dd09e Fix potential null ptr deref in 'origin_from_cow'
return NULL rather than segfaulting if lv->snapshot is not set
2011-07-19 16:23:52 +00:00
Petr Rockai
8e5d1c93eb More work on cache maintenance code in lvmetad: keep track of PV status. 2011-07-19 14:13:59 +00:00
Petr Rockai
f20e649ad1 Start filling in the core LVMetaD functionality and the corresponding
testclient bits.
2011-07-18 14:48:30 +00:00
Petr Rockai
8e72b3d6d6 Various improvements to the daemon-common code, including automated response
formatting from config trees provided by the daemon implementation.
2011-07-18 14:46:54 +00:00
Petr Rockai
8970d1b895 Improve format_buffer in daemon-shared.c, adding block formatting in addition
to string/integer (this propagates to the *simple* family of request/response
functionality).
2011-07-18 14:42:44 +00:00
Petr Rockai
056828e039 Revert the #include changes. Need to fix this at the #include site for now, and
eventually refactor the way we structure #includes in the all of the library.
2011-07-18 14:34:33 +00:00
Petr Rockai
8e1761a804 Slightly refactor the config code to allow better reuse (no functional change). 2011-07-18 13:26:08 +00:00
Petr Rockai
4ac59193ee Add a missing libdevmapper.h #include to lvm-types.h (for dm_list). 2011-07-18 13:24:48 +00:00
Petr Rockai
668f986b05 Add a couple missing #includes in uuid.h. 2011-07-18 13:24:14 +00:00
Zdenek Kabelac
2d812dea99 Better version of failing 'should' test
where the naming is left completely on lvm.
(Commited code has been different version of test).
So here it should be able to figure out new free name and create a new LV.
2011-07-18 12:12:41 +00:00
Zdenek Kabelac
e0c9b24a6d Adding test for repair when confusing metadata are found 2011-07-18 12:04:42 +00:00
Jonathan Earl Brassow
b546c17105 LVM2 RAID design doc 2011-07-14 17:00:59 +00:00
Petr Rockai
1c8512eb62 Fix t-vgreduce-usage to stop relying on the persistent cache not seeing a
device that has been brought back from the dead: this sometimes fails with
clvmd (the cache is updated "too soon"). Instead, force a pvscan and rely on an
up-to-date cache as usual.
2011-07-11 12:13:07 +00:00
Alasdair Kergon
841a541051 post-release 2011-07-08 19:57:32 +00:00
Alasdair Kergon
4c67e55066 remove unnecessary lvconvert.h 2011-07-08 19:42:11 +00:00
Alasdair Kergon
136c2e70dc pre-release 2011-07-08 19:19:44 +00:00
Alasdair Kergon
f8b6b122ca Downgrade error message - it isn't strictly an internal error in the
library, and the known cause within lvm2 got fixed.
2011-07-08 19:13:05 +00:00
Alasdair Kergon
7a16e1f399 move doc to doc dir 2011-07-08 18:55:28 +00:00
Alasdair Kergon
e66cd0dc35 missing ifdef when new fn unused 2011-07-08 17:31:06 +00:00
Alasdair Kergon
4ccc44282e Remove dev name prefix from dmsetup line output if exactly one dev requested. 2011-07-08 17:08:19 +00:00
Alasdair Kergon
2b069d2ca5 Ignore activation/verify_udev_operations if dm kernel driver vsn < 4.18. 2011-07-08 16:49:04 +00:00
Milan Broz
566939185e Fix warning for pvcreate over MD linear.
If MD linear device has set rounding (overload chunk size attribute),
the pvcreate command prints this warning:

  /dev/md0 sysfs attr level not in expected format: linear
2011-07-08 15:53:59 +00:00
Alasdair Kergon
1ba4781cb0 Need to snapshot lookup by uuid instead of name in case it's renamed. 2011-07-08 15:35:50 +00:00
Alasdair Kergon
2c860451a9 Log cookie values when incrementing/decrementing to give clues about races. 2011-07-08 15:34:47 +00:00
Alasdair Kergon
688a4e87ce Move snapshot removal activation logic into lib/activate. 2011-07-08 12:49:50 +00:00
Alasdair Kergon
7cd8120de1 Move snapshot deactivation logic into lib/activate, fixing the
teardown sequence.  (Previously the snapshot was deactivated while its
origin was active and before its removal was committed to disk, so
restarting after a crash at the point would leave corruption.)
2011-07-08 12:48:41 +00:00
Alasdair Kergon
225d1df8a7 report which dev was not found 2011-07-08 12:35:48 +00:00
Zdenek Kabelac
459f6c60e6 Increase timeout for udev settle
For some unknown reason (atm) udev settle takes longer time on recent rawhide.
To pass the test - inrease the settle timeout.
2011-07-08 10:05:43 +00:00
Alasdair Kergon
1dc49506bc Cope with a PV only discovered missing when creating deptree. 2011-07-06 00:29:44 +00:00
Alasdair Kergon
f7b3c167b0 Abort operation if dm_tree_node_add_target_area fails. 2011-07-05 23:10:14 +00:00
Alasdair Kergon
9f67b0f0ed Always perform preload logic before suspending - not only in the case when we
have precommitted metadata.  (Necessary to avoid loading tables
while suspend in lvchange --refresh.)
2011-07-05 18:36:37 +00:00
Alasdair Kergon
7970cad6de decode cookie flags in debug msgs 2011-07-05 16:17:14 +00:00
Alasdair Kergon
84a6c240d5 Snapshots LVs are never loaded in their own right, only along with their
origin.
2011-07-05 01:08:42 +00:00
Alasdair Kergon
8f03913e0b Fix conditions using no_merging: only those using lv_is_merging_cow() should
have been converted, not pure lv_is_cow ones.
(Merging has no impact on how the pre-merged cow segment itself is loaded.)
2011-07-05 01:01:19 +00:00
Alasdair Kergon
30776aea04 first attempt to fix test 2011-07-04 16:27:27 +00:00
Alasdair Kergon
ec000e1dfa reinstate accidentally-removed lines to fix pvmove again 2011-07-04 14:56:58 +00:00
Alasdair Kergon
4b22b81391 Report internal error if suspending a device using an already-suspended dev.
This catches the recent pvmove problem trapping I/O between layers.
2011-07-02 01:17:09 +00:00
Jonathan Earl Brassow
cfafb8a92a RAID updates for gdbinit file 2011-07-01 16:15:25 +00:00
Alasdair Kergon
0eee66e257 Add framework for validation of ioctls. Doesn't do any checks yet.
dmsetup --checks
libdevmapper: dm_task_enable_checks()
lvm.conf: activation/checks=1
2011-07-01 14:09:19 +00:00
Alasdair Kergon
34be5fb50f update pv_attr check for new missing attr 2011-06-30 19:32:51 +00:00
Alasdair Kergon
0c6b43a8ef When suspending, automatically preload newly-visible existing LVs
Let's find out if this makes things better or worse overall...
2011-06-30 18:25:18 +00:00
Zdenek Kabelac
3dd7f795f5 Report internal error when parameters are missing on table load
When some target is passing empty parameters to some dm target,
report this as an internal error to better catch some broken
table construction (some mirror conversions seem to be doing
this for now).
2011-06-30 09:24:58 +00:00
Alasdair Kergon
3f98a2885d use remove -f 2011-06-30 09:17:49 +00:00
Alasdair Kergon
2fcba16688 teardown stray filesystems too 2011-06-30 09:15:53 +00:00
Zdenek Kabelac
5beb23518d Use -f to remove device
Since some test may leave devices in suspend mode which would require
carefull order of resume operation - use '-f' to replace them with
error targets

For disable_dev  - when  'error' target is used for open count - treat
return code as ok (|| true) to avoid breaking futher test processing.
2011-06-30 09:08:22 +00:00
Milan Broz
cf332f746e Try to force remove dm devices in after test fails. 2011-06-30 08:50:10 +00:00
Alasdair Kergon
2df71f33ac Try to remove any stray loop devices left behind by earlier aborted tests. 2011-06-30 01:17:37 +00:00
Alasdair Kergon
61a821884d test attempt to remove stray loop devs 2011-06-30 00:57:29 +00:00
Petr Rockai
0952bf42a8 Differentiate the request and response format, in daemon/common. 2011-06-29 22:20:14 +00:00
Alasdair Kergon
06577e86d8 Add age filter to dmsetup udevcomplete_all to minimise concurrency problems. 2011-06-29 21:56:46 +00:00
Alasdair Kergon
589154d209 make udev teardown conditional on using udev 2011-06-29 18:18:18 +00:00
Alasdair Kergon
73c5f313e5 teardown aborts if it hits an error, so better teardown semaphores earlier
as we need them later in the teardown process
2011-06-29 18:14:08 +00:00
Alasdair Kergon
637245e9f5 print any loop devs that weren't torn down 2011-06-29 17:37:30 +00:00
Alasdair Kergon
e2aa3cf6ae remove any stray semaphores after tearing down devs 2011-06-29 17:33:39 +00:00
Alasdair Kergon
08315e3ac4 Reinstate correct permissions when creating mirrors. 2011-06-29 17:05:53 +00:00
Alasdair Kergon
e145fa0e83 debug log readonly flag with ioctls 2011-06-29 16:08:33 +00:00
Alasdair Kergon
07324d6221 Append 'm' attribute to pv_attr for missing PVs. 2011-06-29 14:56:33 +00:00
Alasdair Kergon
ec8423ad76 Add uuids to dmsetup create cmds. 2011-06-29 12:37:51 +00:00
Alasdair Kergon
d5b96bdd95 use dmsetup remove -f as a better way of simulating a device "disappearing" 2011-06-29 12:33:18 +00:00
Alasdair Kergon
eed2109a93 Remove temporary failures now, but continue to give INTERNAL_ERROR. 2011-06-29 11:36:37 +00:00
Alasdair Kergon
283f4a0649 Temporary conversion to internal error and failure, to see how many
instances of this problem this flushes out.
2011-06-29 08:54:13 +00:00
Jonathan Earl Brassow
fe4a490ff3 Fix error message - the parameter name is '--mirrorlog' not '--log' 2011-06-29 02:06:26 +00:00
Jonathan Earl Brassow
20f055bcb4 Remove unnecesary conditional.
The conditional is not just unnecessary, it would have been wrong.  The code
is suppose to be checking if the 'splitmirrors_ARG' is negative, but it
instead is checking 'mirrors_ARG'.  Rather than changing the argument being
checked, I've pulled the check entirely because 'splitmirrors_ARG' is already
guarenteed to not be negative by virtue of the fact that it is a 'int_arg'.
Negative values will be caught in _process_command_line().
2011-06-28 21:28:35 +00:00
Zdenek Kabelac
daf9efb723 Annotate CLVMD_CMD_SYNC_NAMES in decode_cmd 2011-06-28 13:42:15 +00:00
Alasdair Kergon
2e7ab82b92 More readahead tests. 2011-06-28 09:43:28 +00:00
Alasdair Kergon
0fac155599 more fixes to readahead etc. 2011-06-28 09:24:13 +00:00
Alasdair Kergon
77d16e2e39 Change to still sync with udev (to test waiting for rules) then to verify udev
operations like it used to (which will perform them on systems with
non-std dev dirs where test udev rules aren't installed).
2011-06-28 00:38:26 +00:00
Alasdair Kergon
de0e6b4119 Remove enforcement of udev verification when using non-standard /dev location.
If you change the dev dir, it's your responsibility to adjust udev rules
or tell lvm not to use udev too.
2011-06-28 00:23:06 +00:00
Alasdair Kergon
1c365783e8 Selectively enable/disable udev rules. 2011-06-28 00:11:46 +00:00
Alasdair Kergon
eb81f7ab87 Move _set_lvm_fallback into toolcontext, fix string comparison (/devtest
matched /dev) and note that function should go anyway as it can be
overriding a valid config.
2011-06-27 23:43:04 +00:00
Alasdair Kergon
ed04c9d702 Extend debug log messages to distinguish between the 3 states:
trust udev; verify udev; perform dev node operations directly.
2011-06-27 22:38:53 +00:00
Alasdair Kergon
ea687806b3 Move udev_only logic inside stacked node op code.
(We still need to treat add+readhead+del as a no-op.)
Rename udev_fallback to verify_udev_operations.
Rename --udevfallback to --verifyudev
2011-06-27 21:43:58 +00:00
Petr Rockai
1a3dab6075 Add int/str lookup routines specific to the reply (in client) and request (in
server) for nicer-looking code (thin wrapping around find_config_{int,str}).
2011-06-27 14:03:58 +00:00
Petr Rockai
e828044efd Also parse the config_tree on the client end (daemon-client.c). 2011-06-27 13:58:11 +00:00
Petr Rockai
a084548cce Parse the incoming config tree in daemon-server.c, providing the
daemon-specific handler with a struct config_tree pointer to look things up in.
2011-06-27 13:46:45 +00:00
Petr Rockai
8d9710bed6 LVMetaD build hack: link to lvm-internal &c. and add a simple test script. 2011-06-27 13:44:33 +00:00
Petr Rockai
0783429215 Implement daemon_send_simple and use it in the testclient. 2011-06-27 13:15:49 +00:00
Petr Rockai
0d7965aeca Do not open the socket with SOCK_NONBLOCK in daemon-client, since we have no
use for that behaviour (at least for now).
2011-06-27 13:14:53 +00:00
Petr Rockai
b5997e5b05 Call daemon_close before exit in the testclient. 2011-06-27 12:27:34 +00:00
Petr Rockai
fbcc8450b8 Sketch out the actual on-wire format in daemon-client.h documentation, and add
a simplified send interface.
2011-06-27 12:26:54 +00:00
Alasdair Kergon
24e1d27e54 remove unused var after recent patch 2011-06-24 23:39:09 +00:00
Alasdair Kergon
ec98c72b04 Return immediately dm_lib_exit() if called more than once.
(Avoiding calling it twice would involve some untangling.)
Decrement the new suspended_counter if removing a suspended device.
2011-06-24 19:33:41 +00:00
Jonathan Earl Brassow
4ec4a0c105 Fix to preserve exclusive activation of mirror while up-converting.
When an LVM mirror is up-converted (an additional image added), it creates
a temporary mirror stack.  The lower-level mirror in the stack that is
created was not being activated exclusively - violating the exclusive nature
of the original mirror.  We now check for exclusive activation of a mirror
before converting it, and if found, we ensure that the temporary mirror
is also exclusively activated.
2011-06-23 14:00:58 +00:00
Milan Broz
3477dd6c19 Fail allocation if number of extents not divisible by area count
Allocation should fail early if this condition is not met.

Quick fix for https://bugzilla.redhat.com/show_bug.cgi?id=707779
2011-06-23 10:53:24 +00:00
Jonathan Earl Brassow
38444f0f13 Fix issue preventing cluster mirror creation.
Mirrors used to be created by first creating a linear device and then adding
the other images plus the log.  Now mirrors are created by creating all the
images in one go and then adding the log separately.  The new way ran into
the condition that cluster mirrors cannot change the log type (in the case
of creation, from core -> disk) while the mirror is not active.  (It isn't
active because it is in the process of being created.)  The reason this
condition is in place is because a remote node may have the mirror active, and
we don't want to alter the log underneath it.

What we really needed was a way of checking if the mirror was active remotely
but not locally, and in that case do not allow a change of the log.  I've added
this check, and cluster mirrors can now be created again.
2011-06-22 21:31:21 +00:00
Peter Rajnoha
ffd363ef38 Add check for library fallback in _deactivate_node.
This fn calls rm_dev_node directly - an exceptional case. It needs to check
the DM_UDEV_DISABLE_LIBRARY_FALLBACK flag directly (it's called in dm_task_run
normally where it's checked already).
2011-06-22 12:56:02 +00:00
Peter Rajnoha
c99e816790 Disable udev fallback and add --udevfallback option to dmsetup. 2011-06-17 14:55:51 +00:00
Peter Rajnoha
1bc531301e Disable udev fallback by default and add activation/udev_fallback to lvm.conf.
We've used udev fallback code till now to check whether udev
created/removed the entries in /dev correctly and if not,
a repair was done (giving a warning messagea about that).

This patch adds a possibility to enable this additional check
and subsequent fallback only when required (debugging purposes
mostly) and trust udev completely.

So let's disable the fallback code by default and add a new
configuration option "activation/udev_fallback".

(The original code for creating the nodes will still be used
in case the device directory that is set in lvm.conf differs
from the one that udev uses and also when activation/udev_rules
is set to 0 - otherwise we would end up with no nodes/symlinks
at all)
2011-06-17 14:50:53 +00:00
Zdenek Kabelac
7e6c6e0741 Code move of vg_mark_partial() up in stack
It's useful to keep the partial flag cached - so just move the call
for vg_mark_partil_lvs() into import_vg_from_config_tree() so it gets
evaluated before it goes through the lvmcache.

This patch should not present any functional change.

Note: It is rather temporal solution - proper place is probably inside the
'read' call back - but needs some more discussion.
For now using this minor hack.
2011-06-17 14:39:10 +00:00
Zdenek Kabelac
9313eb28be Remove unused internal flag ACTIVATE_EXCL from the code 2011-06-17 14:30:58 +00:00
Zdenek Kabelac
df535c251f Remove test for status flag
As the ACTIVATE_EXCL could be set only in clvmd code - there is no
use for this test in lv_add_mirrors() function only called from
tools context.

FIXME: Add cluster test case for this.
2011-06-17 14:27:34 +00:00
Zdenek Kabelac
e954b58f29 Add couple FIXMEs around suspicious code 2011-06-17 14:24:18 +00:00
Zdenek Kabelac
5b1254a741 Use lv_activate_opts struct instead of ACTIVATE_EXCL status flag
Let's hope all conditions has been properly converted.
2011-06-17 14:22:48 +00:00
Zdenek Kabelac
4626847312 Use lv_activate_opts struct instead of ACTIVATE_EXCL status flag. 2011-06-17 14:17:16 +00:00
Zdenek Kabelac
55a359615f Add lv_activate_opts structure
To avoid modification of 'read-only' volume group structure
add a new structure to pass local data around the code for LV
activation.

As origin_only is one such flag - replace this parameter with new
struct lv_activate_opts.

More parameters might eventually become part of lv_activate_opts.
2011-06-17 14:14:19 +00:00
Petr Rockai
e5749424a4 What's new. 2011-06-15 17:48:35 +00:00
Petr Rockai
9ae83a8ea9 Fix RHBZ 651590 (failure to lock LV results in failure to repair mirror after
transient error), stemming from the following sequence of events:

1) devices fail IO, triggering repair
2) dmeventd starts fixing up the mirror
3) during the downconversion, a new metadata version is written

--> the devices come back online here

4) the mirror device suspend/resume is called to update DM tables
5) during the suspend/resume cycle, *pre*-commit metadata is read;
   however, since the failed devices are now back online, we get back
   inconsistent set of precommit metadata and the whole operation fails

The patch relaxes the check that fails in step 5 above, namely by ignoring
inconsistencies coming from PVs that are marked MISSING.
2011-06-15 17:45:02 +00:00
Peter Rajnoha
55525d6a92 Add proper udev library context initialization and finalization to liblvm.
This was missing in liblvm and it caused all udev-related operations to
not take effect when using liblvm, e.g. obtaining the list of devices from udev
db instead of scanning the whole /dev which also recreated the .cache as a side
effect. This was also the case with udisks-lvm-pv-export prober which is run
from within udev rules whenever the CHANGE event is fired.
2011-06-15 13:29:48 +00:00
Milan Broz
f9ef876aab Fix lvreduce stripe rounding up from previous commit. 2011-06-15 10:56:52 +00:00
Petr Rockai
691e08ff76 Add a skeleton for lvmetad, a test client, and a temporary Makefile to build
them. These are currently mostly for testing the daemon-common code. LVMetaD
functionality is expected to trickle in soon though.
2011-06-14 02:36:38 +00:00
Petr Rockai
763e834367 Common daemon code: Implement basic socket-based communication infrastructure
(both client and server side). The server handles each connection in a separate
thread.
2011-06-14 02:34:18 +00:00
Alasdair Kergon
359ca22912 Fix last snapshot removal to avoid table reload while a device is suspended. 2011-06-13 22:28:04 +00:00
Alasdair Kergon
6094919b5c Fix fields in warning message. 2011-06-13 03:53:02 +00:00
Alasdair Kergon
ccadc42707 Maintain a count of the number of suspended devices in libdevmapper
and use this for the LVM critical section logic.  Also report an error if
code tries to load a table while any device is known to be in the
suspended state.
(If the variety of problems these changes are showing up can't be fixed
before the next release, the error messages can be reduced to debug
level.)
2011-06-13 03:32:45 +00:00
Alasdair Kergon
e5d134ff97 Fix 'pvs -o pv_all' to include label fields. (Also removed recursion.) 2011-06-12 19:49:40 +00:00
Alasdair Kergon
60578c20cb Disable critical section internal errors until this can be fixed properly
in libdevmapper.
2011-06-12 00:23:50 +00:00
Alasdair Kergon
bb7b8d8780 Fix --mirrorlog mirrored. 2011-06-11 12:55:31 +00:00
Alasdair Kergon
76564e3168 Major pvmove fix to issue ioctls in the correct order when multiple LVs
are affected by the move.  (Currently it's possible for I/O to become
trapped between suspended devices amongst other problems.

The current fix was selected so as to minimise the testing surface.  I
hope eventually to replace it with a cleaner one that extends the
deptree code.

Some lvconvert scenarios still suffer from related problems.
2011-06-11 00:03:06 +00:00
Milan Broz
19f592f230 Fix kernel version test (RHEL: 2.6.32-123). 2011-06-09 19:48:26 +00:00
Milan Broz
a543712431 Fix mirrored stripe reduction.
Patch adds check for stripe not only in direct
LV segment but also in mirror image segment.

This prevents bugs like:

# lvcreate -i2 -l10 -n lv vg_test
# lvconvert -m1 -i1 vg_test/lv

# lvreduce -f -l1 vg_test/lv
  WARNING: Reducing active logical volume to 4.00 MiB
  THIS MAY DESTROY YOUR DATA (filesystem etc.)
  Reducing logical volume lv to 4.00 MiB
  Segment extent reduction 9 not divisible by #stripes 2
  Logical volume lv successfully resized

# lvremove -f vg_test
  Segment extent reduction 1 not divisible by #stripes 2
  LV segment lv:0-4294967295 is incorrectly listed as being used by LV lv_mimage_0
  Internal error: LV segments corrupted in lv_mimage_0.
2011-06-09 19:38:56 +00:00
Milan Broz
10f5ecdc64 Validate mirror segments size
Currently some operation with striped mirrors lead
to corrupted metadata, this patch just add detection of such
situation.

Example:
# lvcreate -i2 -l10 -n lvs vg_test
# lvconvert -m1 vg_test/lvs

# lvreduce -f -l1 vg_test/lvs
  Reducing logical volume lvs to 4.00 MiB
  Segment extent reduction 9not divisible by #stripes 2
  Logical volume lvs successfully resized

# lvremove vg_test/lvs
  Segment extent reduction 1not divisible by #stripes 2
  LV segment lvs:0-4294967295 is incorrectly listed as being used by LV lvs_mimage_0
  Internal error: LV segments corrupted in lvs_mimage_0.
2011-06-09 19:36:16 +00:00
Milan Broz
4964e2d615 Fix extent rounding for striped segments.
We should never remove more extents than requested by user,
so round up to next stripe boundary during lvreduce.

Also this fixes round to zero sized LV bug:

# lvcreate -i2 -I 64k -l10 -n lvs vg_test
# lvreduce -f -l1 vg_test/lvs
  Rounding size (1 extents) down to stripe boundary size for segment (0 extents)
  WARNING: Reducing active logical volume to 0
  THIS MAY DESTROY YOUR DATA (filesystem etc.)
  Reducing logical volume lvs to 0
  Failed to suspend lvs
2011-06-09 19:34:49 +00:00
Milan Broz
4abb15db5f Fix kernel version parsing in tests, use common function. 2011-06-09 18:58:00 +00:00
Milan Broz
bb685d4e6c Fix another occurrence of linux kernel version check. 2011-06-09 15:52:59 +00:00
Milan Broz
15d64a0899 Accept kernel version 3 (3.0-rc and similar). 2011-06-09 15:07:40 +00:00
Milan Broz
81594343d3 Fix topology test.
Kernel version is misparsed in Fedora rawhide, just test sysfs file.

SCSI_DEBUG_DEV is no longer exported, use provided file instead.
2011-06-09 14:26:47 +00:00
Peter Rajnoha
fab0de8998 Fix create_temp_name to replace any '/' found in the hostname with '?'.
There's a possibility someone will use the '/' in the hostname. Since we
generate a temporary file name (path) including the hostname, any '/' would
be ambiguous.

We can always set such hostname using 'sethostname' from unistd.h. But the
'hostname' command already includes the check and removes the '/' char.
However, some old versions still allow that.
See: https://bugzilla.redhat.com/show_bug.cgi?id=711445.

Since this is only a temporary name and the possibility of this error is
quite negligible, we don't need any complex escape sequence here, just a
simple char replace.
2011-06-08 08:49:53 +00:00
Alasdair Kergon
42a13432d0 missing space in mesg 2011-06-06 12:08:42 +00:00
Petr Rockai
db40789590 Also check in the pe_start API test that the disks have not been touched. 2011-06-04 21:55:25 +00:00
Petr Rockai
af8966c42e Add an API test that shows how to find a value of pe_start for a VG without
actually creating the VG on-disk.
2011-06-04 21:46:34 +00:00
Peter Rajnoha
e01642db66 Fix a few compile warnings.
clvmd-command.c:84:2: warning: implicit declaration of function ‘init_test’
lvm-functions.c:141:3: warning: too many arguments for format
2011-06-03 09:05:30 +00:00
Milan Broz
9e1376e5a1 Use append but except query for dlm locks:-) 2011-06-02 09:08:16 +00:00
Milan Broz
28ba223ca9 Always use append to file in lvmdump (selinux policy - no file truncation).
Workaround dmsetup ls --tree (terminal ioctl for width).
(Until isatty() will be allowed by selinux policy.)
2011-06-02 09:02:03 +00:00
Milan Broz
970f51c102 Use /var/run/lvm for lvmetad socket. 2011-06-02 08:58:05 +00:00
Alasdair Kergon
fe920a6a65 Propagate test mode to clvmd to skip activation and changes to held locks. 2011-06-01 21:16:55 +00:00
Alasdair Kergon
0ee514bc81 Defer writing PV labels to vg_write.
Store label_sector only in struct physical_volume.
2011-06-01 19:29:31 +00:00
Alasdair Kergon
6f86b62946 Report sector containing label in verbose message. 2011-06-01 19:26:38 +00:00
Alasdair Kergon
761b6a2189 Permit --available with lvcreate so non-snapshot LVs need not be activated. 2011-06-01 19:21:03 +00:00
Alasdair Kergon
7f77f9d400 Report sector containing label in verbose message. 2011-06-01 15:30:36 +00:00
Peter Rajnoha
ce2c219999 Clarify error message when unable to convert an LV into a snapshot of an LV. 2011-06-01 12:24:15 +00:00
Peter Rajnoha
5771fee535 Use new dev_open_readonly fn to prevent opening devices for read-write when not necessary.
Before, we used vg_write_lock_held call to determnine the way a device is
opened. Unfortunately, this opened many devices in RW mode when it was not
really necessary. With the OPTIONS+="watch" rule used in the udev rules,
this could fire numerous events while closing such devices (and it caused
useless scans from within udev rules in return).

A common bug we hit with this was with the lvremove command which was unable
to remove the LV since it was being opened from within the udev rules. This
patch should minimize such situations (at least with respect to LVM handling
of devices).

Though there's still a possibility someone will open a device 'outside' in
parallel and fire the event based on the watch rule when closing a device
once opened for RW.
2011-05-28 09:48:14 +00:00
Petr Rockai
2c8102f7a1 First draft of a document describing how we will automatically and
incrementally assemble (possibly multi-component, like LVM) storage devices.
2011-05-25 21:43:12 +00:00
Alasdair Kergon
28714df966 test 2011-05-24 14:12:36 +00:00
Alasdair Kergon
7979602398 test 2011-05-24 14:10:55 +00:00
Alasdair Kergon
c807c139bf test 2011-05-24 14:10:55 +00:00
Alasdair Kergon
72ca3644cb test 2011-05-24 14:09:41 +00:00
Alasdair Kergon
329213cc57 test 2011-05-24 14:09:41 +00:00
Alasdair Kergon
dbcb996ba2 test 2011-05-24 14:01:32 +00:00
Alasdair Kergon
52a85391f6 test 2011-05-24 14:00:57 +00:00
Alasdair Kergon
67f4a7cec4 test 2011-05-24 13:59:16 +00:00
Alasdair Kergon
a52b5b8b61 test 2011-05-24 13:54:37 +00:00
Alasdair Kergon
77d331c2bb test 2011-05-24 13:53:26 +00:00
Alasdair Kergon
4834e71812 test 2011-05-24 13:53:26 +00:00
Alasdair Kergon
20fdf0231e Add and use dev_open_readonly and variations. 2011-05-24 13:36:57 +00:00
Petr Rockai
6a7c185e30 Mention code layout in lvmetad DESIGN. 2011-05-23 14:46:48 +00:00
Zdenek Kabelac
be761afa4c Do not log a superfluous stack message when the lv is properly processed 2011-05-19 13:59:22 +00:00
Petr Rockai
7d37e830ca More work on the common daemon framework. Make things compile, too. 2011-05-15 11:02:29 +00:00
Petr Rockai
3777234077 More scavenging of common daemon code, this time the clvmd local socket setup
sequence.
2011-05-13 09:34:12 +00:00
Petr Rockai
2ca65cc602 Start filling in some of the common daemon (server-side) functionality, taking
dmeventd code as a starting point.
2011-05-13 08:45:46 +00:00
Petr Rockai
c2ac1bee92 First go at the lvmetad client-side interface. 2011-05-13 08:17:26 +00:00
Petr Rockai
51fb2c7551 First stab at the prototypes of the daemon-common functionality (to be
eventually shared by dmeventd, lvmetad and clvmd).
2011-05-13 08:07:28 +00:00
Petr Rockai
b904359bac Initial design document for LVMetaD, building on the draft from June of last
year, incorporating the outcomes of today's and yesterday's discussions.
2011-05-12 17:49:46 +00:00
Peter Rajnoha
7c9f1ae834 Do not issue an error message when unable to remove .cache on read-only fs. 2011-05-12 12:42:47 +00:00
Petr Rockai
37bfb6508d Update WHATS_NEW. 2011-05-07 15:52:50 +00:00
Petr Rockai
7b64fccba1 Rewrite vgreduce --removemissing --force, using existing primitives:
lv_remove_with_dependencies and mirror_remove_missing (the latter coming from
lvconvert). Remove substantial amount of legacy code.
2011-05-07 15:52:16 +00:00
Petr Rockai
95986e42a1 Add a new entry point in the mirror lvconvert code, for removing missing mirror
images and/or logs, without attempting any further actions.
2011-05-07 13:56:13 +00:00
Petr Rockai
d792fc925c When glibc needs buffers for line buffering of input and output buffers, it
allocates these buffers in such way it adds memory page for each such buffer
and size of unlock memory check will mismatch by 1 or 2 pages.

This happens when we print or read lines without '\n' so these buffers are
used. To avoid this extra allocation, use setvbuf to set these bufffers ahead.

Signed-off-by: Zdenek Kabelac <zkabelac@redhat.com>
Reviewed-by: Milan Broz <mbroz@redhat.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2011-05-07 13:50:11 +00:00
Petr Rockai
f9e419bad6 Make vg_mark_partial_lvs also clear existing PARTIAL_LV flags, so it can be
issued repeatedly on the same VG, keeping the PARTIAL_LV flags up to date.
2011-05-07 13:32:05 +00:00
Alasdair Kergon
9a3e6e423c test update without WHATS_NEW to check it gives warning now 2011-04-29 19:06:17 +00:00
Alasdair Kergon
efe82852a3 test 2011-04-29 19:05:11 +00:00
Alasdair Kergon
02dd81f4be test 2011-04-29 19:01:59 +00:00
Alasdair Kergon
912c86e526 test 2011-04-29 19:01:59 +00:00
Alasdair Kergon
0285bb3312 test 2011-04-29 18:57:09 +00:00
Alasdair Kergon
935e65c9ae test 2011-04-29 18:56:35 +00:00
Alasdair Kergon
39235f0894 test 2011-04-29 18:48:50 +00:00
Alasdair Kergon
781242009d test 2011-04-29 18:43:31 +00:00
Alasdair Kergon
d9b479b3c1 test 2011-04-29 18:41:46 +00:00
Alasdair Kergon
a5543b5345 test 2011-04-29 18:40:55 +00:00
Alasdair Kergon
f3342c7f89 test 2011-04-29 18:40:14 +00:00
Alasdair Kergon
6f776305ee post-release 2011-04-29 17:05:20 +00:00
Alasdair Kergon
938cc8daa7 wake buildbot 2011-04-29 16:27:09 +00:00
Alasdair Kergon
e63f55fcc7 commands/toolcontext.c:578: warning: ‘udev_dir’ may be used uninitialized in this function
commands/toolcontext.c:576: warning: ‘udev_dir_len’ may be used uninitialized in this function

Bogus - suppress them.
2011-04-29 16:23:39 +00:00
Alasdair Kergon
24e17116d3 pre-release 2011-04-29 16:12:21 +00:00
Alasdair Kergon
d6a05b6fd7 pre-release clean-ups 2011-04-29 00:21:13 +00:00
Zdenek Kabelac
9139a95616 Add missing cleanup for vgimportclone script 2011-04-28 22:17:06 +00:00
Alasdair Kergon
425d920af6 clean up critical section patch 2011-04-28 20:29:59 +00:00
Zdenek Kabelac
3b2eb14eea Missing space in debug message 2011-04-28 19:59:17 +00:00
Alasdair Kergon
7722eea88c Set pv_min_size to 2048KB to exclude floppy drives.
Previously was 512.
2011-04-28 17:33:34 +00:00
Alasdair Kergon
c52cf64cea Add missing pkg_config_init check with --enable-udev_sync. 2011-04-28 16:54:32 +00:00
Alasdair Kergon
d1ca1d9cb0 Adjust pvmove man page wording.
Add missing --alloc anywhere to example and mention that snapshots and
mirrors can't be moved.
2011-04-28 16:22:47 +00:00
Peter Rajnoha
ed244de069 Uncomment obtain_device_list_from_udev setting in lvm.conf.
Just for consistency with other settings we already have in lvm.conf.
2011-04-26 09:09:24 +00:00
Peter Rajnoha
2b78acd736 autoreconf 2011-04-22 12:13:05 +00:00
Peter Rajnoha
0967b9504b Use common udev_get_dev_dir() to get current device directory managed by udev. 2011-04-22 12:07:35 +00:00
Peter Rajnoha
79bed56a52 Obtain device list from udev by default if LVM2 is compiled with udev support.
Also, add a new 'obtain_device_list_from_udev' setting to lvm.conf with which
we can turn this feature on or off if needed.

If set, the cache of block device nodes with all associated symlinks
will be constructed out of the existing udev database content.
This avoids using and opening any inapplicable non-block devices or
subdirectories found in the device directory. This setting is applied
to udev-managed device directory only, other directories will be scanned
fully. LVM2 needs to be compiled with udev support for this setting to
take effect. N.B. Any device node or symlink not managed by udev in
udev directory will be ignored with this setting on.
2011-04-22 12:05:32 +00:00
Peter Rajnoha
87edad13ea Move common libudev code to lvm-wrappers.[ch].
...so we can use it throughout.
2011-04-22 11:59:59 +00:00
Peter Rajnoha
b7c85c8aa6 Require libudev >= 143 when compiling with udev support.
Old versions of libudev < 143 were experimental and unstable. Require recent
and stable versions only (version 143 is old enough anyway).
2011-04-22 11:56:41 +00:00
Dave Wysochanski
207060d03c Disable nightly test for vgimportclone until proper fix is made.
This fails on lenny buildslave for some reason.
For now disable the vgimportclone part of the test until proper fix.

Let the first part of the test still run though, which shows pvs working
with duplicate pvs.
2011-04-21 19:06:00 +00:00
Dave Wysochanski
b641164ece Add vgimportclone symlink for nightly test. 2011-04-21 17:42:58 +00:00
Dave Wysochanski
8a997e48fb Add nightly test for vgimportclone and querying of vgnames with duplicate pvs.
Related to rhbz 697959.

This test fails prior to these two commits:
commit af112eb2c9
Author: Zdenek Kabelac <zkabelac@redhat.com>
Date:   Thu Apr 21 13:15:26 2011 +0000

    Skip check for NULL before dm_free

    dm_free makes this test itself.

commit 91419c3e86
Author: Zdenek Kabelac <zkabelac@redhat.com>
Date:   Thu Apr 21 13:13:40 2011 +0000

    Fix use of released vgname and vgid
2011-04-21 17:03:38 +00:00
Zdenek Kabelac
af112eb2c9 Skip check for NULL before dm_free
dm_free makes this test itself.
2011-04-21 13:15:26 +00:00
Zdenek Kabelac
91419c3e86 Fix use of released vgname and vgid
Avoid using of already released memory when duplicated MDA is found.

As get_pv_from_vg_by_id() may call lvmcache_label_scan() use the local copy
of the vgname and vgid on the stack as vginfo may dissapear and code was
then accessing garbage in memory.

i.e.  pvs  /dev/loop0
(when /dev/loop0 and /dev/loop1 has same MDA content)

Invalid read of size 1
   at 0x523C986: dm_hash_lookup (hash.c:325)
   by 0x440C8C: vginfo_from_vgname (lvmcache.c:399)
   by 0x4605C0: _create_vg_text_instance (format-text.c:1882)
   by 0x46140D: _text_create_text_instance (format-text.c:2243)
   by 0x47EB49: _vg_read (metadata.c:2887)
   by 0x47FBD8: vg_read_internal (metadata.c:3231)
   by 0x477594: get_pv_from_vg_by_id (metadata.c:344)
   by 0x45F07A: _get_pv_if_in_vg (format-text.c:1400)
   by 0x45F0B9: _populate_pv_fields (format-text.c:1414)
   by 0x45F40F: _text_pv_read (format-text.c:1493)
   by 0x480431: _pv_read (metadata.c:3500)
   by 0x4802B2: pv_read (metadata.c:3462)
 Address 0x652ab80 is 0 bytes inside a block of size 4 free'd
   at 0x4C2756E: free (vg_replace_malloc.c:366)
   by 0x442277: _free_vginfo (lvmcache.c:963)
   by 0x44235E: _drop_vginfo (lvmcache.c:992)
   by 0x442B23: _lvmcache_update_vgname (lvmcache.c:1165)
   by 0x443449: lvmcache_update_vgname_and_id (lvmcache.c:1358)
   by 0x443C07: lvmcache_add (lvmcache.c:1492)
   by 0x46588C: _text_read (text_label.c:271)
   by 0x466A65: label_read (label.c:289)
   by 0x4413FC: lvmcache_label_scan (lvmcache.c:635)
   by 0x4605AD: _create_vg_text_instance (format-text.c:1881)
   by 0x46140D: _text_create_text_instance (format-text.c:2243)
   by 0x47EB49: _vg_read (metadata.c:2887)

Add testing script
2011-04-21 13:13:40 +00:00
Petr Rockai
35c174671b Always copy all tests to builddir regardless of T/S, since subsequent runs
(possibly with different T or S) skip that step.
2011-04-19 15:28:17 +00:00
Mike Snitzer
5a2322eb7d Improve the discard documentation. Also improve discard code in
pv_manip.c to properly account for case when pe_start=0 and the first
physical extent is to be released (currently skip the first extent to
avoid discarding the PV label).
2011-04-13 18:26:39 +00:00
Mike Snitzer
63b91e514e Use uint32_t rather than uint64_t. 2011-04-12 22:04:04 +00:00
Mike Snitzer
e65d6e4a11 Add "devices/issue_discards" to lvm.conf.
Issue discards on lvremove if enabled and both storage and kernel have support.
2011-04-12 21:59:01 +00:00
Mike Snitzer
77392833f4 fix s/then/than/ typo in lvm.conf.5.in 2011-04-12 21:21:08 +00:00
Mike Snitzer
5828e7e584 fix s/then/than/ typos in 'pv_min_size' section and fold floppy example
into documentation preamble.
2011-04-12 20:44:41 +00:00
Zdenek Kabelac
efc5d04ec7 Replace dm_snprintf with strncpy
My previous patch fixed incorrect error check for dm_snprintf.
However in this particular case - dm_snprintf has been used differently -
just like strncpy + setting last char with '\0' - so the code had to return
error - because the buffer was to short for whole string.

Patch replaces it with real strncpy.
Also test for alloca() failure is removed - as the program behaviour
is rather undefined in this case - it never returns NULL.
2011-04-12 14:13:17 +00:00
Petr Rockai
ba7d6c9dd0 Make the mirror log checks in t-lvconvert-repair-policy a bit more strict
(i.e. accurate).
2011-04-12 12:39:24 +00:00
Petr Rockai
81095cc6ea This patchset refactors some reporting code and completes the remaining
lvseg properties for lvm2app, 'devices' and 'seg_pe_ranges'.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2011-04-12 12:24:29 +00:00
Zdenek Kabelac
103b5a7538 Fix incorrect tests for dm_snprintf() failure
As the memory is preallocated based on arg size in these cases,
the error would be quite hard to trigger here anyway.
2011-04-09 19:05:23 +00:00
Zdenek Kabelac
1f7f7cb701 Fix some unmatching sign comparation gcc warnings
Simple replacement for unsigned type - usually in for() loops.
2011-04-08 14:40:18 +00:00
Zdenek Kabelac
83a0836f31 Add missing printf attributes
These attributes were missing in previous patch, that was adding
instrumentation for printf formating string parameter.
2011-04-08 14:21:34 +00:00
Zdenek Kabelac
7e3042b541 Use dm_snprintf
and fix differently signed comparation.
2011-04-08 14:18:40 +00:00
Zdenek Kabelac
7e27d8f4db Better const cast logic
(although still gcc gives const violation warning)
2011-04-08 14:14:57 +00:00
Zdenek Kabelac
34b60db59f Fix some forgotten -Wold-style-definition gcc warnings 2011-04-08 14:13:08 +00:00
Zdenek Kabelac
42f177b3e2 Newer gcc doesn't need this trick
In fact it now generates an opposite warning about using undefined variable.
2011-04-08 14:11:40 +00:00
Jonathan Earl Brassow
9f8c06b31c Thanks to Zdenek Kabelac (kabi) for pointing out that I was using
dm_pool_free incorrectly.  This check-in fixes that incorrect usage.

I've also added a WHATS_NEW line to reflect the changes I made to allow
lv_extend to operate on 0 length intrinsically layered LVs (i.e mirrors
and RAID).  I forgot that in the last commit.
2011-04-07 21:49:29 +00:00
Jonathan Earl Brassow
98ac36c049 This patch adds the ability to extend 0 length layered LVs. This
allows us to allocate all images of a mirror (or RAID array) at one
time during create.

The current mirror implementation still requires a separate allocation
for the log, however.
2011-04-06 21:32:20 +00:00
Petr Rockai
69dda2cfa0 Add rudimentary versioning to the dmevend protocol, allowing us to detect the
(protocol) version of the running dmeventd on the client side.

Right now this is only used in dmeventd -R.
2011-04-04 16:11:09 +00:00
Peter Rajnoha
251265821f Cleanup fid finalization code in free_vg and allow exactly the same fid to be set again for a PV/VG.
Actually, we can call vg_set_fid(vg, NULL) instead of calling
destroy_instance for all PV structs and a VG struct - it's the same
code we already have in the vg_set_fid.

Also, allow exactly the same fid to be set again for the same PV/VG
Before, this could end up with the fid destroyed because we destroyed
existing fid first and then we used the new one and we didn't care
whether existing one == new one by chance.
2011-04-01 14:54:20 +00:00
Peter Rajnoha
64f51a3797 Finalize PV format instances properly in lvm_vg_write fn.
lvm_vg_write fn reinitializes the vg->removed_pvs list. We have to finalize
all PV format instances attached to PVs found in the original list.
2011-04-01 13:44:51 +00:00
Zdenek Kabelac
3b0651049b Fix free_vg order
As now the FID management is more complex, the code inside free_vg needs
to access some parts of memory pools which were not needed before.

For this - makes the order of unlock_and_free_vg() unconditional.
Keek using unlock_and_free_vg() API function.

For properly working VG locking mechanism only the alphabeting locking
orderer needs to be preserved.

TODO: there could be few more code parts simplified when we 'officially'
support of referencies between different memory pools.
2011-03-30 14:35:00 +00:00
Zdenek Kabelac
f00b069236 Testing: remove -q from grep
to avoid any problems with pipe breaking.
2011-03-30 13:39:24 +00:00
Zdenek Kabelac
a754694221 Use created hash tables for quick check of LV, PV.
Instead of searching linear list of all LVs, PVs - use created hash tables
also for quick mapping between LV.

(Note - for small number of PVs or LVs the overhead of the hash is bigger).

TODO: Use hash tables in volume_group structure directly.
2011-03-30 13:35:51 +00:00
Zdenek Kabelac
954464e5aa Keep the cache content when the exported vg buffer is matching
Instead of regenerating config tree and parsing same data again,
check whether export_vg_to_buffer does not produce same string as
the one already cached - in this case keep it, otherwise throw cached
content away.

For the code simplicity calling _free_cached_vgmetadata() with
vgmetadata == NULL as the function handles this itself.

Note: sometimes export_vg_to_buffer() generates almost the same data
with just different time stamp, but for the patch simplicity,
data are reparsed in this case.

This patch currently helps for vgrefresh.
2011-03-30 13:14:34 +00:00
Zdenek Kabelac
57f9272a6a Few more files filtered from memory locking
Code located in these files should not be used in critical section.
2011-03-30 13:06:13 +00:00
Zdenek Kabelac
649d3a0fdf Word alignment for strings
Align strdup char* allocation just on 2 bytes.
It looks like wasting space to align strings on 8 bytes.
(Could be even 1byte - but for hashing it might eventually get better
perfomance - but probably hardly measurable).

TODO: check on various architectures it's not making any problems.
2011-03-30 12:57:03 +00:00
Zdenek Kabelac
b74ae69aad Optimise error message write to _lvm_errmsg
Isn't usually perfomance critical - but log_error is used i.e.for debuging,
this code noticable slows down the processing.

Added 512KB limit to avoid memory exhastions in case of some endless loop.

TODO: use _lvm_errmsg buffer only when lvm2api needs it.
2011-03-30 12:53:04 +00:00
Zdenek Kabelac
d951cb0e68 Keep noreturn attribute for lvm_thread_fn
Even thought my gcc seems to not need it, as it's deduced from pthread_exit(),
keep it here for older compiler to avoid getting warnings.
2011-03-30 12:48:16 +00:00
Zdenek Kabelac
1fa8125125 Valgrind updates
Avoid locking sum testing with valgrind compilation.

Make memory unaccessible in the valgrind for dm_pool_abadon_object.

Valgrind hinting should not be needed in _free_chunk for dm_free.
2011-03-30 12:43:32 +00:00
Zdenek Kabelac
f251589b6b Better shutdown for clvmd
'a small step' towards cleaner shutdown sequence.
Normally clvmd doens't care about unreleased memory on exit -
but for valgrind testing it's better to have them cleaned all.

So - few things are left on exit path - this patch starts to remove
just some of them.

1. lvm_thread_fs is made as a thread which could be joined on exit()
2. memory allocated to local_clien_head list is released.
   (this part is somewhat more complex if the proper reaction is
   needed - and as it requires some heavier code moving - it will
   be resolved later.
2011-03-30 12:36:19 +00:00
Zdenek Kabelac
305e8a8a5c Fix reading of unitialized memory
Could be reached via few of our lvm2 test cases:

==11501== Invalid read of size 8
==11501==    at 0x49B2E0: _area_length (import-extents.c:204)
==11501==    by 0x49B40C: _read_linear (import-extents.c:222)
==11501==    by 0x49B952: _build_segments (import-extents.c:323)
==11501==    by 0x49B9A0: _build_all_segments (import-extents.c:334)
==11501==    by 0x49BB4C: import_extents (import-extents.c:364)
==11501==    by 0x497655: _format1_vg_read (format1.c:217)
==11501==    by 0x47E43E: _vg_read (metadata.c:2901)

cut from t-vgcvgbackup-usage.sh
--
pvcreate -M1 $(cat DEVICES)
vgcreate -M1 -c n $vg $(cat DEVICES)
lvcreate -l1 -n $lv1 $vg $dev1
--

Idea of the fix is rather defensive - to allocate one extra element
to 'map' array which is then used in _area_length() - where the
loop checks, whether next map entry is continuous.

By placing there always one extra zero entry -
we fix the read of unallocated memory, and we make sure the data would
not make a continous block.

FIXME: there could be a problem if some special broken lvm1 data would be imported.
As the format1 is currently not really used - leave it for future fix
and use this small hotfix for now.
2011-03-30 12:30:39 +00:00
Zdenek Kabelac
2ebc24197a Simplify pool debug initialization
zalloc pool structure and skip zeroing members.
2011-03-30 12:16:15 +00:00
Zdenek Kabelac
db6446aa2f Fix 2 signess warnings reported by gcc
Replace int with unsigned counter.
Replace snprintf with dm_snprintf.
2011-03-30 12:14:36 +00:00
Zdenek Kabelac
9458a9da09 Use id_equal instead of strncmp()
More consistent and easier to read.
2011-03-29 21:57:56 +00:00
Zdenek Kabelac
8207745583 Add attribute printf 2011-03-29 21:56:53 +00:00
Zdenek Kabelac
b29742d2fa Add attribute printf
gcc suggested to add these attributes to instrumentat
printf arguments. Adding it for internal functions as well.
2011-03-29 21:53:46 +00:00
Zdenek Kabelac
c2759c3644 Const warning fixes
With recent update of dm_report_field_string() API call to accept
completely const objects - we no longer need loose constness here
and keep it forwarding.
2011-03-29 21:49:18 +00:00
Zdenek Kabelac
a26bc49296 Fix access to released memory
Invalid primary_vginfo was supposed to move all its lvmcache_infos to
orphan_vginfo - however it has called _drop_vginfo() inside the loop
that released primary_vginfo itself - thus made the loop using released
memory.

Use _vginfo_detach_info() instead and call _drop_vginfo after
th loop is finished.

Valgrind trace it should fix:

Invalid read of size 8
   at 0x41E960: _lvmcache_update_vgname (lvmcache.c:1229)
   by 0x41EF86: lvmcache_update_vgname_and_id (lvmcache.c:1360)
   by 0x441393: _text_read (text_label.c:329)
   by 0x442221: label_read (label.c:289)
   by 0x41CF92: lvmcache_label_scan (lvmcache.c:635)
   by 0x45B303: _vg_read_by_vgid (metadata.c:3342)
   by 0x45B4A6: lv_from_lvid (metadata.c:3381)
   by 0x41B555: lv_activation_filter (activate.c:1346)
   by 0x415868: do_activate_lv (lvm-functions.c:343)
   by 0x415E8C: do_lock_lv (lvm-functions.c:532)
   by 0x40FD5F: do_command (clvmd-command.c:120)
   by 0x413D7B: process_local_command (clvmd.c:1686)
 Address 0x63eba10 is 16 bytes inside a block of size 160 free'd
   at 0x4C2756E: free (vg_replace_malloc.c:366)
   by 0x41DE70: _free_vginfo (lvmcache.c:980)
   by 0x41DEDA: _drop_vginfo (lvmcache.c:998)
   by 0x41E854: _lvmcache_update_vgname (lvmcache.c:1238)
   by 0x41EF86: lvmcache_update_vgname_and_id (lvmcache.c:1360)
   by 0x441393: _text_read (text_label.c:329)
   by 0x442221: label_read (label.c:289)
   by 0x41CF92: lvmcache_label_scan (lvmcache.c:635)
   by 0x45B303: _vg_read_by_vgid (metadata.c:3342)
   by 0x45B4A6: lv_from_lvid (metadata.c:3381)
   by 0x41B555: lv_activation_filter (activate.c:1346)
   by 0x415868: do_activate_lv (lvm-functions.c:343)

problematic line:
dm_list_iterate_items_safe(info2, info3, &primary_vginfo->infos)
2011-03-29 21:34:18 +00:00
Zdenek Kabelac
999238d4fa Fix sending uninitilised bytes in cluster messages
Fix 2 more functions sending cluster messages to avoid passing uninitilised bytes
and compensate 1 extra byte attached to the message from the clvm_header.args[1]
member variable.
2011-03-29 21:05:39 +00:00
Zdenek Kabelac
9e1eeffe6f Fix -Wold-style-definition gcc warnings 2011-03-29 20:30:05 +00:00
Zdenek Kabelac
fb5983fb8f Remove double braces
Clang gives notice about possible confusion as commonly double bracces are
used when some assignment is done inside them.
2011-03-29 20:19:03 +00:00
Jonathan Earl Brassow
4497409a31 gdbinit update
When doing lv_status_r on a sub_lv, do not climb the tree up past the
starting point.
2011-03-29 12:53:13 +00:00
Jonathan Earl Brassow
01b265e08d s/MIRROR_NOTSYNCED/LV_NOTSYNCED/ - Flag will may refer to more than just mirrors 2011-03-29 12:51:57 +00:00
Zdenek Kabelac
012b164bae Reduce amount of vgremove and vgchange calls
A bit noticable time defference when whole test-suite is run through valgrind.
2011-03-28 11:35:20 +00:00
Alasdair Kergon
2b6028fd13 Rename _check_version 2011-03-27 13:44:08 +00:00
Alasdair Kergon
8420ee864d Use hard-coded /dev/mapper/control details for 2.6.36+ kernels and simplify
associated code.  (Some obscure configurations that happened to work before
are no longer supported.)
2011-03-25 23:50:35 +00:00
Jonathan Earl Brassow
d010b440a3 Fix unhandled condition in _move_lv_segments
If _move_lv_segments is passed a 'lv_from' that does not yet
have any segments, it will screw things up because the code
that does the segment copy assumes there is at least one
segment.  See copy code here:
        lv_to->segments = lv_from->segments;
        lv_to->segments.n->p = &lv_to->segments;
        lv_to->segments.p->n = &lv_to->segments;

If 'segments' is an empty list, the first statement copies over
the values, but the next two reset those values to point to the
other LV's list structure.  'lv_to' now appears to have one
segment, but it is really an ill-set pointer.
2011-03-25 22:02:27 +00:00
Jonathan Earl Brassow
e720e51548 Replace malloc with zalloc when creating segment_type's 2011-03-25 21:59:42 +00:00
Jonathan Earl Brassow
6bc394736e cosmetic change - swapping one macro for another
When I see 'seg_is_mirrored', I expect the argument to be an lv_segment.
In this case, it is lvcreate_params.  Both structures, have a 'segtype'
entry which the macro dereferences.  However, it just seems easier to
understand if we do 'segtype_is_mirrored' instead.
2011-03-25 21:56:28 +00:00
Petr Rockai
be59769771 What's new. 2011-03-24 16:03:32 +00:00
Petr Rockai
07834012c5 In some cases, we could end up with a mirrored LV without a MIRRORED flag. In
other cases, the code could wind up removing wrong number of mirrors. In yet
other cases, we could remove the right number of mirrors, but fail to respect
the removal preferences (i.e. keep an image that was requested to be removed
while removing an image that was requested to be kept). Under some
circumstances, remove_mirror_images could also get stuck in an infinite loop.

This patch should fix all of the above undesirable behaviours.

Signed-off-by: Petr Rockai <prockai@redhat.com>
Reviewed-by: Jonathan Brassow <jbrassow@redhat.com>
2011-03-24 12:28:02 +00:00
Zdenek Kabelac
123c9b8de5 Release allocated memory on closedown
Release allocated buffers before exit() in debug singlenode - so it's not
poping out in valgrind as memory leak.
2011-03-24 10:45:00 +00:00
Alasdair Kergon
1d250e8cb3 Fix last checkin - added error message text wrong. 2011-03-20 02:00:52 +00:00
Zdenek Kabelac
7c626d5c9a Improve debug stack trace
dm_check_version reports log_error() - so use return_NULL.
2011-03-18 13:21:02 +00:00
Milan Broz
a2222288b5 Mitigate some warnings if running as non-root user.
LVM doesn't behave correctly if running as non-root user,
there is warning when it detects it.

Despite this, it produces many error messages, saying nothing.
See https://bugzilla.redhat.com/show_bug.cgi?id=620571

This patch fixes two things:
1) Removes eror message from device_is_usable() which has no
information value anyway (real warning is printed inside it).

2) it fixes device-mapper initialization, if we support
core dm module autoload and device node is present, it should
fail early and not try recreate existing and correct node.
(non-root == permission denied here)

N.B. In future code should support user roles, some more
drastic checks in code are probably contraproductive now.
2011-03-18 12:17:57 +00:00
Jonathan Earl Brassow
cb385da467 Watch out for collisions in GDB global namespace.
Better 'lv_status_r' printing.
2011-03-14 18:05:56 +00:00
Zdenek Kabelac
0084e7b633 Add missing \0 for grown debug object
Attach \0 for proper char* display - otherwise somewhat random message could
be displayed in debug more and read of unpredictable read of uninitilized
memory values could happen.
2011-03-14 17:00:57 +00:00
Zdenek Kabelac
e8dd7c08fe Revert this commit
This buffer allocation must have been problem somewhere else.
(as sizeof() already has the 'extra' '\0' included).
For now reverting this commit.
2011-03-13 23:18:30 +00:00
Zdenek Kabelac
1847de3d68 Fix allocation of system_id
As code uses strncpy(system_id, NAME_LEN) and doesn't set '\0'
Fix it by always allocating NAME_LEN + 1 buffer size and with zalloc
we always get '\0' as the last byte.

This bug may trigger some unexpected behavior of the string operation
code - depends on the pool allocator.

FIXME: refactor this code to alloc_vg.
2011-03-13 23:05:48 +00:00
Zdenek Kabelac
d8d824c6c5 Use proper size of strncpy
Avoid reading extra character if we expect to have there '\0'.
2011-03-13 23:01:08 +00:00
Zdenek Kabelac
77e3ec4e28 Fix buffer allocation size for uuid string
We have 3 components and traling '\0' so allocate proper room for all of them.
Problem was nicely hidden by allocation from pool and allocation aligment
offset - so to trigger real problem with this one is actually hard.
2011-03-13 22:57:51 +00:00
Zdenek Kabelac
0e88dec738 Fix usage of readlink
Return value of readlink limits valid string size.
Characters after returned size present some garbage to printf.
Fix it by placing '\0' on the return size value.
2011-03-13 22:52:16 +00:00
Peter Rajnoha
4d86ffec2e Remove compile warning for lock_id.
%llx --> PRIx64
2011-03-13 18:08:26 +00:00
Jonathan Earl Brassow
4366379b3d lv_status_r now prints out status of attached log LVs
(Print layout could be nicer... :(
2011-03-11 22:25:36 +00:00
Jonathan Earl Brassow
876d50dbe2 lv_status_r has been fixed-up so that it supports infinite LV stacking
(It does not yet follow 'log_lv' or 'origin' links.)
2011-03-11 22:16:38 +00:00
Jonathan Earl Brassow
eae4a9eb03 detect RAID* LV and segment flags. 2011-03-11 18:22:39 +00:00
Peter Rajnoha
3a609bcd5d Use format instance mempool where possible and adequate. 2011-03-11 15:10:16 +00:00
Peter Rajnoha
4428aa0fdf Various cleanups for fid mem and ref_count changes.
Missing free_vg on error_path in lvmcache_get_vg fn. Call destroy_instance
only if the fid is not part of the vg in backup_read_vg fn (otherwise it's
part of the VG we're returning and we definitely don't want to destroy it!).
2011-03-11 15:08:31 +00:00
Peter Rajnoha
fef82df095 Call destroy_instance for any PVs found in VG structure during vg_free call.
This is necessary for proper format instance ref_count support. We iterate
over vg->pvs and vg->removed_pvs list and the ref_count is decremented and
then it is destroyed if not referenced anymore.
2011-03-11 15:06:13 +00:00
Peter Rajnoha
359d2505d3 Add new free_pv_fid fn and use it throughout to free all attached fids.
Since format instances will use own memory pool, it's necessary to properly
deallocate it. For now, only fid is deallocated. The PV structure itself
still uses cmd mempool mostly, but anytime we'd like to add a mempool
in the struct physical_volume, we can just rename this fn to free_pv and
add the code (like we have free_vg fn for VGs).
2011-03-11 14:56:56 +00:00
Peter Rajnoha
d3d6337ecf Use only vg_set_fid and new pv_set_fid fn to assign the format instance.
This is essential for proper format instance ref_count support. We must
use these functions to set the fid everywhere from now on, even the NULL
value!
2011-03-11 14:50:13 +00:00
Peter Rajnoha
aa7198a530 Make create_text_context fn static and move it inside create_instance fn.
We'd like to use the fid mempool for text_context that is stored
in the instance (we used cmd mempool before, so the order of
initialisation was not a matter, but now it is since we need to
create the fid mempool first which happens in create_instance fn).

The text_context initialisation is not needed anywhere outside the
create_instance fn so move it there.
2011-03-11 14:45:17 +00:00
Peter Rajnoha
9e125c5708 Add mem and ref_count fields to struct format_instance for own mempool use.
Format instances can be created anytime on demand and it contains
metadata area information mostly (at least for now, but in the future,
we may store more things here to update/edit in a PV/VG). In case we
have lots of metadata areas, memory consumption will rise. Using cmd
context mempool is not quite optimal here because it is destroyed too
late. So let's use a separate mempool for format instances.

Reference counting is used because fids could be shared, e.g. each PV
has either a PV-based fid or VG-based fid. If it's VG-based, each PV has
a shared fid with the VG - a reference to VG's fid.
2011-03-11 14:38:38 +00:00
Peter Rajnoha
2b62190d4b Use new alloc_fid fn for common format instance initialisation. 2011-03-11 14:30:27 +00:00
Zdenek Kabelac
466c7e43b4 Missed merge fix in vg_validate patch 2011-03-10 22:39:36 +00:00
Zdenek Kabelac
41d9b8a647 Document pv_min_size in lvm.conf manpage 2011-03-10 15:20:10 +00:00
Zdenek Kabelac
685e54f4dd Optimise _eat_space and _get_token
Makes the code more readable and has a smaller number of memory
accesses thus it's small optimisation as well.

For _get_token() optimize number parsing. Check for '.' char only
if it's not a digit. Move pointer incrementation into one place.

For _eat_space() check only p->te for '\0' in skipping of comment line.
Avoid check for '\0' when we know it is space. Also master while loop
doesn't need checking p->tb for '\0'. We just need to check p->tb
isn't already at the end of buffer. This could give 'extra' loop cycle
if we are already there - but safes memory access in every other case.
2011-03-10 14:51:35 +00:00
Zdenek Kabelac
f84700a182 Keep pool name also for pool-fast
It's cheap to keep this name - and it is useful for 'non pool debug code'
compilation.
2011-03-10 14:49:01 +00:00
Zdenek Kabelac
46e72765b6 Indent spaces to tabs 2011-03-10 14:47:22 +00:00
Zdenek Kabelac
20d6bb2d9c Refactor code for _lv_postoder
Add _lv_postorder_vg() - for calling _lv_postorder() for every LV from VG.
We use this in 2 places -  vg_mark_partial_lvs() and vg_validate()
so make it as a one function.

Benefit here is - to use only one cleanup code and avoid
potentially duplicate scans of same LVs.
2011-03-10 14:40:32 +00:00
Jonathan Earl Brassow
51d61a76da gdbinit - A GDB init file to help while debugging LVM.
Copy this file as '.gdbinit' to your home directory or your working
directory.  It adds the following commands to gdb:
 - first_seg
 - lv_status
 - lv_status_r
 - lv_is_mirrored
 - seg_item
 - seg_status
 - segs_using_this_lv

You can get a list of these user-defined commands by typing:
(gdb) help user-defined

You can get more information on each command by typing:
(gdb) help <command>
2011-03-10 13:45:12 +00:00
Zdenek Kabelac
8fb9f86271 Use hash tables for validating names
Accelerate validation loop by using lvname, lvid, pvid hash tables.
Also merge pvl loop into one cycle now - no need to scan the list twice.
List scan is stopped when dm_hash_insert fails.

The error message with loop_counter1 is no longer provided - however
the message has been misleading anyway.
2011-03-10 13:11:59 +00:00
Milan Broz
0c586a7359 Fix dmsetup man page typo (John Bradshaw) 2011-03-10 13:11:45 +00:00
Zdenek Kabelac
938ebb30bf Use void pointer instead of char for binary key
dm_hash binary functions takes void* key - so there is no need to cast
pointers to char* (also the hash key does not have trailing '\0').

This is slight API change, but presents no change for the API user side
it just allows to write code easier as the casting could be removed.
2011-03-10 12:48:40 +00:00
Zdenek Kabelac
b5d7757c3d Refactor vg allocation code
Create new function alloc_vg() to allocate VG structure.

It takes pool_name (for easier debugging).
and also take vg_name to futher simplify code.

Move remainder of _build_vg_from_pds  to _pool_vg_read
and use vg memory pool for import functions.
(it's been using smem -> fid mempool -> cmd mempool)
(FIXME: remove mempool parameter for import functions and use vg).

Move remainder of the _build_vg to _format1_vg_read
2011-03-10 12:43:29 +00:00
Alasdair Kergon
d8601fd4b6 Avoid possible endless loop in _free_vginfo when 4 or more VGs have same name. 2011-03-10 03:03:03 +00:00
Alasdair Kergon
38c3277bf6 Use empty string instead of /dev// for LV path when there's no VG.
Don't allocate unused VG mempool in _pvsegs_sub_single.
2011-03-09 12:44:42 +00:00
Zdenek Kabelac
20c6ce9e13 Do not send random bytes in message
Fixing few issues:

struct clvm_header  contains  'char args[1]' - so adding '+ 1' here
for message length calculation is 1 byte off.
Message with last byte uninitialized is then passed to write function.
Update also related arglen.

Initialise xid and clintid to 0.

Memory allocation is checked for NULL
2011-03-08 22:48:50 +00:00
Zdenek Kabelac
d9785ae4f1 Fix reading byte from char params[-1] position
When the ->params string is empty - memory access is made on the byte
before allocated buffer (catched by valgrind) - in the case it would
constain 0x20 - it would even overwrite this buffer.
So fix by checking len > 0 before doing such access.
Also slightly optimise this loop from repeated strlen call.
2011-03-08 22:43:19 +00:00
Milan Broz
b118538f94 Fix clvmd return code for bad options.
We should return exit code 2 for unknown option.

Patch also adds standard --help option instead.

Fixes https://bugzilla.redhat.com/show_bug.cgi?id=666991
2011-03-08 13:27:39 +00:00
Milan Broz
c2501a7747 Fix test for 8k page (fails on Sparc architecture). 2011-03-06 18:28:09 +00:00
Zdenek Kabelac
f19572f894 Use lvm_getpagesize wrapper 2011-03-06 17:52:07 +00:00
Milan Broz
03a1410b12 Fix hardcoded page size, fixing test fails with 8k page and new kernel. 2011-03-06 16:47:43 +00:00
Milan Broz
40c79aba89 Move secure flag warning to verbose level.
This feature is not yet upstream and becuase cryptsetup in next version
uses this flag, it will cause a lot of noise on old kernels.
2011-03-05 21:17:19 +00:00
Zdenek Kabelac
68a58ad799 Fix reading of released memory
lvseg_segtype_dup used memory pool vg memory pool for strind duplication.
However this one gets released before reporting happens so the command like:

pvs -o segtype

prints data from already released memory pool. Thanks to the fact there
is not much allocation happing after the VG is released, the memory
stays unmodified and correct result is printed.

Fix adds support for mempool passed parameter (like other similar
query commands) and uses dm_report memory pool for string duplication.
2011-03-05 12:14:00 +00:00
Milan Broz
ddb128b039 Make the lock test a little bit better... 2011-03-04 15:04:08 +00:00
Milan Broz
3e881b9726 Wait until dmeventd properly starts in test. 2011-03-04 14:19:18 +00:00
Zdenek Kabelac
7b0f2031c4 Leave some time to scheduler
Looks like scheduler prefers running task before exiting tasks.
So leave some time to finish running flock process.
2011-03-03 16:04:53 +00:00
Zdenek Kabelac
786d8aa089 Avoid using pipes on lv commands
Failure on pipe kills lv command and produces memory stacktrace.
Add 'true' for empty grep/sed to avoid generating stacktrace.

This should help with long time (7min) for this test on Lenny testing maching
(mimage was filler with stacktrace and each word was used as LV name).
2011-03-03 15:46:05 +00:00
Milan Broz
263c008cf5 Fix test, "echo n" is not needed when -y is specified. 2011-03-03 14:16:08 +00:00
Peter Rajnoha
7fdd43860e Lower severity of selabel_lookup and matchpathcon failure to log_debug. 2011-03-03 13:05:40 +00:00
Zdenek Kabelac
06f8f76ca3 Do not echo y to lvconvert
This lvconvert is now able to complete operation automatically without question.
So remove  'echo y |'  - it's breaking pipe on some bash versions.

i.e.:
+ echo y
./t-lvconvert-mirror-basic.sh: line 104: echo: write error: Broken pipe

while command has completed this operation succesfully
2011-03-03 12:30:33 +00:00
Milan Broz
79770354fe Fix syslog initialisation in clvmd to respect lvm.conf setting. 2011-03-02 23:38:22 +00:00
Milan Broz
e717fa3bef Increase check timeout again.
It fails on my s390x Hercules emulator otherwise. :)
2011-03-02 20:14:39 +00:00
Milan Broz
227457e20d PE size overflows, on most architectures it is catch by "PE cannot be 0"
but s390x unfortunately return something usable.

Always use unit64 in inital parameter check.
2011-03-02 20:00:09 +00:00
Milan Broz
9c2c45f192 Use 64bit unsigned value for maximum stripe size test.
While STRIPE_SIZE_LIMIT * 2 is basically UINT_MAX, 32bit integer
value can already overflow durin arg size parsing.

(This really happens in test where --stripesize 4294967291 is used,
in s390x uint overflow and this test is ineffective.)
2011-03-02 16:56:06 +00:00
Petr Rockai
fd30a1c96e Do not run past the end of an array in dmeventd's _handle_request when faced
with an unknown command ID.
2011-03-02 14:20:48 +00:00
Milan Broz
c537579e61 Fix test to use explicit /dev/mapper/ node, /proc/mounts should not change this...
(failed on RHEL5)
2011-03-02 13:49:29 +00:00
Petr Rockai
45ba9f9c2f As requested in BZ 454618:
- dmeventd -R will continue to start up even if no dmeventd is currently
  running + a test for this behaviour
- add -R to dmeventd manpage
2011-03-02 12:49:13 +00:00
Peter Rajnoha
67f411afc0 Use a copy if moving an mda from pv fid to vg fid.
We'll destroy the pv fid (with all mdas in it) after merging all pv mdas to
a vg in _text_pv_setup fn, hence we need to use a copy here!
2011-03-02 10:23:29 +00:00
Peter Rajnoha
38d96413d0 Make add_metadata_area_to_pv/remove_metadata_area_from_pv static.
No need to put these in format-text.h, it's not used anywhere else actually.
2011-03-02 10:19:14 +00:00
Zdenek Kabelac
7d5f270516 Indent case part properly 2011-03-02 08:41:55 +00:00
Zdenek Kabelac
5c9b44a61e Add missing return 2011-03-02 08:40:28 +00:00
Alasdair Kergon
fa61ff55de Accept multiple mapped device names on many dmsetup command lines. 2011-03-02 02:44:56 +00:00
Alasdair Kergon
9a45e3dbbb Fix dm_udev_wait calls in dmsetup to occur before readahead display not after.
Include an implicit dm_task_update_nodes() within dm_udev_wait().
2011-03-02 00:29:57 +00:00
Milan Broz
5993dc5b48 Rather return success if skipping udev wait. 2011-03-01 23:44:07 +00:00
Zdenek Kabelac
c9aeaebd1a Invert condition for READLINE check 2011-03-01 23:29:14 +00:00
Alasdair Kergon
c588dfbdfa Fix _create_and_load_v4 not to lose the --addnodeoncreate setting (1.02.62). 2011-03-01 23:27:06 +00:00
Zdenek Kabelac
bc7397105c Build test.c also without readline library
Allow bulding of this testing tool also without readline library.
Outputs message about build without readline library then.
2011-03-01 23:18:40 +00:00
Milan Broz
6795c361c1 Fix tests if udevadm missing (old udev). 2011-03-01 22:28:27 +00:00
Zdenek Kabelac
81957c524b Quick fix to compile lvm2api tests when possible
Do not build lvm2api tests when lvm2api lib is not enabled.
2011-03-01 21:30:43 +00:00
Milan Broz
070787d579 Fix some compile warnings on RHEL5
- returned char not needed to be explicitly const
- warn if pipe() fails in clvmd (more fixes here needed for error paths...)
- assign (and ignore) read() output in drain buffer
2011-03-01 20:17:56 +00:00
Milan Broz
75029c2d42 Rephrase backup message. 2011-02-28 20:50:01 +00:00
Zdenek Kabelac
dcb8c448d3 Fix void pointer arithmetic warning in pool debug code 2011-02-28 19:54:30 +00:00
Zdenek Kabelac
d55b658e6c Add fall through comments
Add comments to switch case construct.
2011-02-28 19:53:03 +00:00
Zdenek Kabelac
bb9b9c6514 Remove dead code
Remove code which is no longer used.
Code which has been using msg_malloced was removed in 2007.
2011-02-28 19:50:15 +00:00
Zdenek Kabelac
2163383728 Test result of dm_task_set_uuid 2011-02-28 19:47:22 +00:00
Zdenek Kabelac
23b5c9b9e5 Use backward compatible configuration for test
Currently the test are not properly working with new code.
Use settings for the previous behaviour.

FIXME: update tests to properly test new allocation policies.
2011-02-28 19:44:21 +00:00
Alasdair Kergon
179139735c mention vgreduce in pvremove man page 2011-02-28 19:40:04 +00:00
Alasdair Kergon
9938f0c674 Improve pvremove error message when PV belongs to a VG. 2011-02-28 19:35:09 +00:00
Peter Rajnoha
0de865f20b Add a hint for manual revert if there's an error in pv_write, vg_write, vg_commit for pvresize. 2011-02-28 17:08:09 +00:00
Peter Rajnoha
fc04cb0266 Use pv->vg_name directly instead of pv->vg->name in _text_pv_write.
This also prevents a possible segfault during an automatic repair
when the PV does not belong to a VG anymore and we call pv_write_orphan.
2011-02-28 17:05:48 +00:00
Peter Rajnoha
7f250ab142 Allow non-orphan PVs with two metadata areas to be resized.
We allow writing non-orphan PVs only for resize now. The "orphan PV" assert
in pv_write fn uses the "allow_non_orphan" parameter to control this assert.
However, we should find a more elaborate solution so we can remove this
restriction altogether (pv_write together with vg_write is not atomic, we
need to find a safe mechanism so there's an easy revert possible in case of
an error).
2011-02-28 13:19:02 +00:00
Alasdair Kergon
eb9b172ec0 Fix check for log-only allocation in new alloc normal loop. 2011-02-27 01:16:52 +00:00
Alasdair Kergon
a499caabfb Various changes to the allocation algorithms: Expect some fallout.
There is a lot to test.

Two new config settings added that are intended to make the code behave
closely to the way it did before - worth a try if you find problems.
2011-02-27 00:38:31 +00:00
Peter Rajnoha
c4dbd497b2 vgconvert is fixed now to work with the changes in metadata area handling - enable the tests.
Add a small fix that preserves pe_start for lvm1 PVs when being converted.

(this fix needs to be replaced with something more clever, but let's have this working now)
2011-02-25 14:12:14 +00:00
Peter Rajnoha
ed08b81e18 Allow only orphan PVs to be resized even with two metadata areas. 2011-02-25 14:08:54 +00:00
Peter Rajnoha
8a7a401cae Revert the patch for vgconvert to work with recent changes in metadata area handling.
This should work now with the help of the patch from previous commit.
2011-02-25 14:02:53 +00:00
Peter Rajnoha
cb38146acb Read PV metadata information from cache if pv_setup called with pv->fid == vg->fid.
If the PV is already part of the VG (so the pv->fid == vg->fid), it makes no
sense to attach the mdas information from PV to a VG. Instead, we read new
PV metadata information from cache and attach it to the VG fid.
2011-02-25 13:59:47 +00:00
Peter Rajnoha
1d96b805e1 Fix a bug in metadata location calculation, cleanup pv_add_metadata_area fn.
This bug (a missing line) caused the 2nd MDA area location to be calculated
incorrectly and it didn't fit the disk size properly.

(https://www.redhat.com/archives/lvm-devel/2011-February/msg00127.html)
2011-02-25 13:50:02 +00:00
Peter Rajnoha
65a3e6454b Remove a few size_t type warnings. 2011-02-22 10:01:11 +00:00
Mike Snitzer
96171b0c03 Add inactive table query support for kernel driver >= 4.11.6 (RHEL 5.7). 2011-02-21 16:26:23 +00:00
Peter Rajnoha
4dd4e14fd8 %ld -> PRIu64 2011-02-21 13:09:27 +00:00
Peter Rajnoha
01a0facd7e WHATS_NEW 2011-02-21 12:40:46 +00:00
Peter Rajnoha
7ef77b52a8 Temporarily disable 'convert a VG from lvm2 to lvm1 format' test until a known issue is resolved.
Otherwise we get an error:

  PV segment VG free_count mismatch: 32 != 33
  PV segment VG extent_count mismatch: 32 != 33
2011-02-21 12:37:33 +00:00
Peter Rajnoha
249d85882f Fix metadata balance code to work with recent changes in metadata handling
interface (with the changes in format_instance).
2011-02-21 12:33:16 +00:00
Peter Rajnoha
5261cc0402 Add old_uuid field to struct physical_volume so we can still reference a PV
with its old UUID when we're changig it (the cache as well as metadata area
index has the old uuid that we need to use to access the information!)
2011-02-21 12:31:28 +00:00
Peter Rajnoha
5cbb49ed94 Fix vgconvert code to work with changes in metadata area handling and changes
in format_instance. Add new 'vg_convert' function.
2011-02-21 12:29:21 +00:00
Peter Rajnoha
c7c59e41cd Change pvresize code to work with new metadata handling interface and allow
resizing a PV with two metadata areas.
2011-02-21 12:27:26 +00:00
Peter Rajnoha
771b5db044 Change pv_write code to work with the changes in metadata handling interface
and changes in format_instance.
2011-02-21 12:26:27 +00:00
Peter Rajnoha
d0ffde9114 Remove unused _mda_setup fn. This functionality is covered by new pv_add_metadata_area fn. 2011-02-21 12:25:16 +00:00
Peter Rajnoha
12353bda0a Change the code throughout to use new pv_initialise and modified pv_setup fn.
Change pv_create code to work with these changes together with using new
pv_add_metadata_area fn to add metadata areas for a PV being created.
2011-02-21 12:24:15 +00:00
Peter Rajnoha
e1b1f2f961 Separate new pv_initialise function out of the original pv_setup code.
pv_initiliase initialises a new PV
pv_setup sets up an existing PV with a VG
2011-02-21 12:20:18 +00:00
Peter Rajnoha
d58036bc42 Add new pv_remove_metadata_area interface function. 2011-02-21 12:17:54 +00:00
Peter Rajnoha
83f5538979 Add new pv_add_metadata_area interface function. 2011-02-21 12:17:26 +00:00
Peter Rajnoha
cdc08fcc63 Remove useless mdas parameter for pv_read (from now on, we store mdas in a
format instance)
2011-02-21 12:15:59 +00:00
Peter Rajnoha
ec441b4c5b Add format instance support for pv_read code. 2011-02-21 12:13:40 +00:00
Peter Rajnoha
f78aec2b52 Initialise a new PV-based format instance for a PV that is being created. 2011-02-21 12:12:32 +00:00
Peter Rajnoha
afc29a3d69 Add vg_set_fid function to change VG format instance.
This function also sets a reference to a new VG format instance for all PVs
that are part of the VG so the PV-VG interconnection is consistent after the
change.
2011-02-21 12:10:58 +00:00
Peter Rajnoha
251029609d Change the code throughout for recent changes in format_instance handling. 2011-02-21 12:07:03 +00:00
Peter Rajnoha
3a760ad8cd Change create_instance to create PV-based as well as VG-based format instances.
Add supporting functions to work with the format instance and metadata area
structures stored within the format instance. Add support for simple indexing
of metadata areas using PV id and mda order (for on-disk PV only for now, we
can extend the indexing even for other mdas if needed - we only need to define
a proper key for the index).
2011-02-21 12:05:49 +00:00
Peter Rajnoha
37c5724db2 Change and generalise struct format_instance for PV and VG use. 2011-02-21 12:01:22 +00:00
Alasdair Kergon
1ebe15e7ce Handle decimal digits with --units instead of ignoring them silently.
Fix remaining warnings and compile with -Wpointer-arith.
2011-02-18 23:09:55 +00:00
Zdenek Kabelac
aed211833d Fix gcc warnings for unused variables
Put dead assigment code into comment.
2011-02-18 16:17:56 +00:00
Zdenek Kabelac
e382b2b990 Add debug message for open_count failure
Report  open_count problem as debug.

Function using _node_has_closed_parents decides whether
it's error or could be ignored.
2011-02-18 16:13:56 +00:00
Zdenek Kabelac
2045501076 Use return_0 to get bactrace 2011-02-18 15:05:40 +00:00
Zdenek Kabelac
f43f5f7004 Add some backtraces for error paths in process_each_lv 2011-02-18 15:02:25 +00:00
Zdenek Kabelac
16aa248a35 Memory unlock allows 1 page difference
As the kernel seems to be doing weird things during
mlock -> munlock -  allow 1 page locking difference without
warning - and log just debug message for a 1 page difference.

Allocation happens outside critical section probably during
log_warn printing.

Should make tests passing for now.
2011-02-18 14:51:04 +00:00
Zdenek Kabelac
e50d9e2c57 Const fixing
Fixing some const warnings - with API change in:

int vg_extend(struct volume_group *vg, int pv_count, const char *const *pv_names,

Change is needed - as lvm2api expects const behaviour here.
So vg_extend() is doing local strdup for unescaping.

skip_dev_dir return const char* from const char* vg_name.

Rest of the patch is cleanup of related warnings.

Also using dm_report_filed_string() API change to simplify
casting in _string_disp and _lvname_disp.
2011-02-18 14:47:28 +00:00
Zdenek Kabelac
dad7e11288 API change - support more const arg
As dm_report_field_string() doesn't modify content of data pointer,
it can be marked as const.

It's slight API change - but doesn't require any change on the user side
and supports wider range of arguments without const casting.
(i.e. we may use as paramater const lv struct this way:  &lv->name)
2011-02-18 14:38:47 +00:00
Zdenek Kabelac
f09295a4bd Void* arithmetic replaced with char* 2011-02-18 14:34:41 +00:00
Zdenek Kabelac
c0a8ab7699 Fix !DEVMAPPER_SUPPORT build
Fix build when devmapper is disabled.
2011-02-18 14:29:39 +00:00
Zdenek Kabelac
b11dd1f7b3 Remove fs_unlock after failed suspend
Explicit fs_unlock() after failed suspend is not need -
as it will happen automatically with nearest lv_info()
or vg_unlock().
2011-02-18 14:26:31 +00:00
Zdenek Kabelac
6bea349da8 Critical section
New strategy for memory locking to decrease the number of call to
to un/lock memory when processing critical lvm functions.

Introducing functions for critical section.

Inside the critical section - memory is always locked.
When leaving the critical section, the memory stays locked
until memlock_unlock() is called - this happens with
sync_local_dev_names() and sync_dev_names() function call.

memlock_reset() is needed to reset locking numbers after fork
(polldaemon).

The patch itself is mostly rename:

memlock_inc  -> critical_section_inc
memlock_dec  -> critical_section_dec
memlock      -> critical_section

Daemons (clmvd, dmevent) are using memlock_daemon_inc&dec
(mlockall()) thus they will never release or relock memory they've
already locked memory.

Macros sync_local_dev_names() and sync_dev_names() are functions.
It's better for debugging - and also we do not need to add memlock.h
to locking.h header (for memlock_unlock() prototyp).
2011-02-18 14:16:11 +00:00
Zdenek Kabelac
02c2036731 Replace PV_MIN_SIZE with function pv_min_size()
Add configurable option to define minimal size of
of block device usable as a PV.

pv_min_size() is added to lvm-globals and it's being
initialized through _process_config.

Macro PV_MIN_SIZE is unused and removed.

New define DEFAULT_PV_MIN_SIZE_KB is added to lvm-global
and unlike PV_MIN_SIZE it uses KB units.

Should help users with various slow devices attached to the system,
which cannot be easily filtered out (like FDD on /dev/sdX):
https://bugzilla.redhat.com/show_bug.cgi?id=644578
2011-02-18 14:11:22 +00:00
Zdenek Kabelac
42ebbfa703 Support 64bit ints in config 2011-02-18 14:08:22 +00:00
Jonathan Earl Brassow
8f1cc91a84 Fix for bug 677739: removing final exclusive cmirror snapshot,
results in clvmd deadlock

When a logical volume is activated exclusively in a cluster, the
local (non-cluster-aware) target is used.  However, when creating
a snapshot on the exclusive LV, the resulting suspend/resume fails
to load the appropriate device-mapper table - instead loading the
cluster-aware target.

This patch adds an 'exclusive' parameter to the pertinent resume
functions to allow for the right target type to be loaded.
2011-02-18 00:36:04 +00:00
Petr Rockai
9c1b192442 Fix an lv_postorder bug where it failed to clear temporary flags, making it
impossible to use twice with the same LV(s). Discovered by Milan.
2011-02-14 19:27:05 +00:00
Petr Rockai
b6791ca6e4 Avoid flooding syslog with redundant messages when a snapshot is invalidated
(reported by Corey).
2011-02-14 14:26:36 +00:00
Jonathan Earl Brassow
b352da1cc7 bug 659264: more examples for pvmove 2011-02-09 22:24:55 +00:00
Petr Rockai
e8c5b0512d Allow overriding the DM_DEV_DIR used by the test suite (fixes BZ 672796). Use
like this:

# LVM_TEST_DEVDIR=/tmp/dev make check
2011-02-09 17:57:14 +00:00
Petr Rockai
a06a7a618c Fix a typo in pvmove.8 (fixes BZ 673618, spotted by John Bradshaw). 2011-02-09 17:42:15 +00:00
Alasdair Kergon
252d74032a post-release 2011-02-09 12:11:21 +00:00
Alasdair Kergon
6d194f2014 pre-release 2011-02-09 12:04:39 +00:00
Jonathan Earl Brassow
5ff5555418 orginal WHATS_NEW entry was changed to be less in-line with change it
was associated with.  Update entry to be clearer than both.
2011-02-08 22:35:11 +00:00
Zdenek Kabelac
8329e7dc19 Revert DEBUG_MEM cleanup commit
As functions compiled within this define are apparently stil part of the public API,
(though lvm2 code is never using them unless this define is used for compilation),
keep functions available in the code for now -> revert.
2011-02-08 15:52:00 +00:00
Zdenek Kabelac
3a62b826ce Fix CRC32 calculation on big endian CPU
Fix regresion from 2.02.75 speedup - so currently crc32 is a little bit
more complicated on big-endian CPU as the uint32_t needs to be shifted
on here.
2011-02-08 12:41:08 +00:00
Alasdair Kergon
d09c912b81 post-release 2011-02-04 22:46:18 +00:00
Alasdair Kergon
60963d276e pre-release 2011-02-04 22:35:29 +00:00
Zdenek Kabelac
b44de841ff Add configure option --with-device-nodes-on
Make configurable default behaviour how to deal with device node creates.
With udev system natural options should be  'resume'.
For older systems where user expect there is node in /dev/mapper immediately
after  dmsetup create --notable  -  use 'create'

FIXME:
Code needs fixing passing this flag through udev cookie.
2011-02-04 22:17:54 +00:00
Alasdair Kergon
c45e064975 pre-release 2011-02-04 22:07:43 +00:00
Alasdair Kergon
559d2923a7 Warn if secure data flag requested but not supported by kernel. 2011-02-04 21:26:33 +00:00
Zdenek Kabelac
a683b0c993 Place back some common options
Until man pages are generated keep some common options with all commands.
2011-02-04 21:19:03 +00:00
Jonathan Earl Brassow
1ce9f8621b Allow snapshots in a cluster as long as they are exclusively
activated.

In order to achieve this, we need to be able to query whether
the origin is active exclusively (a condition of being able to
add an exclusive snapshot).

Once we are able to query the exclusive activation of an LV, we
can safely create/activate the snapshot.

A change to 'hold_lock' was also made so that a request to aquire
a WRITE lock did not replace an EX lock, which is already a form
of write lock.
2011-02-04 20:30:17 +00:00
Zdenek Kabelac
84f191ce5a Add --addnodeonresume, --addnodeoncreate
Add new function dm_task_set_add_node() to select between 2 types
of node creation in device directory.

DM_ADD_NODE_ON_RESUME is now default and ensures node is created on
resume. Old original behavior is accessible with DM_ADD_NODE_ON_CREATE.
In this case node would be created during dmsetup create --notable.

For the user 2 new options for dmsetup create are added:
[{--addnodeonresume | --addnodeoncreate }]

Properly working node creation on resume is needed for proper operation
stacking and ability to correctly check in which state the device should
after whole udev transation.
2011-02-04 19:33:53 +00:00
Zdenek Kabelac
577c542adc Remove extra sync calls.
Remove temporaly added fs_unlock() calls to fix clmvd usablity.
Now when the message passing is properly working - they are no longer needed.
Simplify no_locking  check for VG unlock - as message is always send
for all targets - clustered & non-clustered.
2011-02-04 19:21:47 +00:00
Zdenek Kabelac
a0866f0990 Use cluster-wide message to request device name sync
Thanks to CLVMD_CMD_SYNC_NAMES propagation fix the message passing started
to work. So starts to send a message before the VG is unlocked.
Removing also implicit sync in VG unlock from clmvd as now the message
is delievered and processed in do_command().
Also add support for this new message into external locking
and mask this event from further processing.
2011-02-04 19:18:16 +00:00
Zdenek Kabelac
29c9aced14 Fix operation node stacking for consecutive dm ops
With the ability to stack many operations in one udev transaction -
in same cases we are adding and removing same device at the same time
(i.e. deactivate followed by activate).

This leads to a problem of checking stacked operations:
i.e. remove /dev/node1 followed by create /dev/node1

If the node creation is handled with udev - there is a problem as
stacked operation gives warning about existing node1 and will try to
remove it - while next operation needs to recreate it.

Current code removes all previous stacked operation if the fs op is
FS_DEL - patch adds similar behavior for FS_ADD - it will try to
remove any 'delete' operation if udev is in use.

For FS_RENAME operation it seems to be more complex. But as we
are always stacking FS_READ_AHEAD after FS_ADD operation -
should be safe to remove all previous operation on the node
when udev is running.

Code does same checking for stacking libdm and liblvm operations.

As a very simple optimization counters were added for each stacked ops
type to avoid unneeded list scans if some operation does not exists in
the list.

Enable skipping of fs_unlock() (udev sync) if only DEL operations are staked.
as we do not use lv_info for already deleted nodes.
2011-02-04 19:14:39 +00:00
Milan Broz
c154d18b11 Suport DM_SECURE_DATA_FLAG.
It will be user for cryptsetup to ensure buffers are properly
wiped when sending sensitive data (key).
2011-02-04 16:08:11 +00:00
Zdenek Kabelac
6ed7b4d790 Increase hash table size to 1024 lv names and 64 pv uuids 2011-02-03 16:03:13 +00:00
Zdenek Kabelac
73300f8977 Remove fs_unlock from lv_resume path
Keep it within clvmd until message for SYNC starts to work.
2011-02-03 01:58:20 +00:00
Zdenek Kabelac
fc585fc2fb Fix wipe size when seting up mda. 2011-02-03 01:41:03 +00:00
Zdenek Kabelac
713b630c66 Do not check for open_count when not needed.
Disable open_count checking in lv_info it it's not used.

Fix previous commit (comment out unsable code for now).
2011-02-03 01:24:46 +00:00
Zdenek Kabelac
3d3f093b8c Synchronize with udev for lv_info
In case the open_count is requested via lv_info - check if there
are any udev operations in-progress - and wait for them
before checking for lv_info
2011-02-03 01:16:35 +00:00
Alasdair Kergon
9e12eb1cf5 a few more comments 2011-02-02 23:57:48 +00:00
Alasdair Kergon
5c3949eac3 Allow CLVMD_CMD_SYNC_NAMES to be propagated around the cluster if requested. 2011-02-02 23:39:39 +00:00
Zdenek Kabelac
d4fd789d78 Better fix for no-locking udev sync and clvmd
This is better way how to fix clustered synchronization with udev.
As the code for message passing needs fixed - put currently
fs_unlock() after every active/deactive command in clvmd to
ensure nodes are properly created in time.
2011-02-02 20:04:39 +00:00
Zdenek Kabelac
197d68a4d0 Revert wrong fix for nolock locking missing fs_unlock
Patch was wrond and introduced recursive lock_vol
Reverting it.
2011-02-02 13:34:00 +00:00
Dave Wysochanski
4ae7c94196 Update WHATS_NEW for lvmdump change. 2011-02-01 21:41:31 +00:00
Dave Wysochanski
248426e1ee Add "dmsetup ls --tree" output to lvmdump.
It would be most useful to add "dmsetup ls --tree" to the commands run.
This command helps in answering the question "which devices are actually
underneath a given LV?"

Although the info is available with other existing dmsetup commands,
adding this command gives a much clearer summary of complex setups.

Here's an example of an LVM mirror, with mirror images on partitions
created on top of multipath devices.  The multipath devices are on
simple block devices.  As you can see, it is easy to see the stacking
from the "dmsetup ls --tree" output:

vgmpathtest-lvmpathmir (253:14)
 ├─vgmpathtest-lvmpathmir_mimage_1 (253:13)
 │  └─mpath5p1 (253:5)
 │     └─mpath5 (253:2)
 │        ├─ (8:16)
 │        └─ (8:0)
 ├─vgmpathtest-lvmpathmir_mimage_0 (253:12)
 │  └─mpath6p1 (253:6)
 │     └─mpath6 (253:3)
 │        ├─ (8:48)
 │        └─ (8:32)
 └─vgmpathtest-lvmpathmir_mlog (253:11)
    └─mpath7 (253:4)
       ├─ (8:80)
       └─ (8:64)
VolGroup00-LogVol01 (253:1)
 └─ (202:2)
vgtest-lvmir (253:10)
 ├─vgtest-lvmir_mimage_1 (253:9)
 │  └─ (7:1)
 ├─vgtest-lvmir_mimage_0 (253:8)
 │  └─ (7:0)
 └─vgtest-lvmir_mlog (253:7)
    └─ (7:3)
VolGroup00-LogVol00 (253:0)
 └─ (202:2)

But it is much harder to see the stacking with only the commands today
("dmsetup info", "dmsetup status", and "dmsetup table").  We could
piece together the stacking from "dmsetup table" but it requires
further processing (take output from "dmsetup info to get
map name to major/minor, then parse "dmsetup table", etc).
2011-02-01 21:39:15 +00:00
Jonathan Earl Brassow
b9d4336960 fix bad 'strcmp's in 'decode_lock_type' - missing !'s
There was no effect from having this wrong yet, because the
tree of callers only ever cared about the answer to the first
condition (!response), which determines whether a lock is
held or not.  Correct responses, however, are needed soon.
2011-02-01 17:31:40 +00:00
Zdenek Kabelac
c60ff20842 Skip sysinit test for cluster
Add #bin/bash to utils.sh
and update fsadm test to current version of aux script
2011-01-31 22:05:30 +00:00
Zdenek Kabelac
e4b7c72b3a Fix udev synchronization for no-locking mode
Instead of implicitly syncing udev operation in clustered and
file locking code -  call synchronization directly in lock_vol() when
the operation unlocks VG

The problem is missing implicit fs_unlock() in the no_locking code.
This is used with --sysinit on read-only filesystem locking dir.
In this case vgchange -ay could exit before all udev nodes are properly
synchronised and may cause problems with accessing such node right after
vgchange --sysinint command is finished.

Add test case for vgchange --sysinit.
2011-01-31 19:52:40 +00:00
Peter Rajnoha
6cbcd683ba Add a debug message when uevent is not generated and we call udev_complete internally. 2011-01-31 11:54:55 +00:00
Zdenek Kabelac
e50edb8410 Lower device size of pvmove test
Lower the size of devices in this test for speedup.
And check only once that  LVs are prepared properly.
2011-01-28 16:15:08 +00:00
Zdenek Kabelac
5b8845974c Lower disk space usage of inconsistent mda test
Smaller size of devices speedups this test.
2011-01-28 16:13:39 +00:00
Zdenek Kabelac
2169ecd063 Replacei sleep wait with udev settle
Use new udev_wait command instead of unpredictable sleep waiting.
As with more devices in the system, udev processing is slower.
2011-01-28 16:12:45 +00:00
Zdenek Kabelac
74c10192aa Query only test related vg name 2011-01-28 16:11:14 +00:00
Zdenek Kabelac
cb3031b7fe Add command to wait for udevadm settle 2011-01-28 16:10:21 +00:00
Zdenek Kabelac
61e665d65b Do not break pipes
As the option 'set -e -o pipefail' is very sensite on pipe breaking
stop using '-q' for grep commands.

Otherwise this command (with large enough table) would fail:

 dmsetup table | egrep -q

with exit code 141 (128 + SIGPIPE)

As Peter suggested, he prefers to keep '-o pipefail' - so make sure all
piped commands will read the whole output and will not exit too early.
2011-01-28 16:08:39 +00:00
Zdenek Kabelac
2d8c68d097 Accelerate mirror tests
Instead of recreation of whole device set for each test round,
just cleanup created LVs to empty VG.
Lower the size of PV devices to 16MB
2011-01-28 16:07:04 +00:00
Zdenek Kabelac
9bcd306c02 Display duration of tests when it passed.
Enhance output with info about the test duration.
Cleanup few declarations in the code.
2011-01-28 16:05:38 +00:00
Zdenek Kabelac
4fa5445562 Updating man pages 2011-01-28 16:03:38 +00:00
Zdenek Kabelac
76b030d82e Compile code for memory debuging only with DEBUG_MEM
When it's not in use - do not compile this code.
Improves lcov code coverage results for this code a lot :)
2011-01-28 16:01:32 +00:00
Peter Rajnoha
2fef22fa85 Set DM_UDEV_DISABLE_OTHER_RULES_FLAG for suspended DM devices in udev rules.
This is to avoid any scanning and processing of DM devices while they are in
suspended state (e.g. a rename while the device is suspended - a CHANGE event
is generated!). Otherwise, any scanning in the rules could end up with locking
the calling process until the device is resumed and so we don't receive a
notification about udev rules completion until then (and that effectively
locks out the process awaiting the notification!).

However, we still keep 'disk' and any 'subsystem' related udev rules running.
We trust these and these should check themselves whether a device is suspended
or not, not trying to run any scanning if it is.
2011-01-28 11:41:51 +00:00
Zdenek Kabelac
8440981845 Use memcpy and add error message
strncpy (which check each byte for \0) is not need as we always copy
the length size - so using memcpy is a bit cheaper.

Add missing log_error message for failed allocation.
2011-01-28 10:19:00 +00:00
Zdenek Kabelac
5ba08f195a Skip NULL check before dm_free
dm_free checks for NULL itself.
2011-01-28 10:16:04 +00:00
Zdenek Kabelac
342e9fee4a Avoid rebuilding of uuid validation table
Small CPU relax...
2011-01-28 10:14:08 +00:00
Mike Snitzer
0db2d39105 Improve lvcreate "insufficient extents" errors to "insufficient free space". 2011-01-28 02:58:00 +00:00
Alasdair Kergon
27e49620ec Use O_DIRECT when reading block devices. 2011-01-27 00:21:37 +00:00
Zdenek Kabelac
f2439f2096 Add missing lvm_quit in test
Fix occasional confusing memory leak report in testing.
2011-01-26 21:21:56 +00:00
Zdenek Kabelac
3289982950 Initialize pool object for each row
Fix assert abort of dmsetup (when compiled with pool debug)
dmsetup splitname --nameprefixes --noheadings --rows gvg-a2

Move pool begin in the inner loop - otherwise it would using
already 'ended' pool object.
2011-01-25 21:51:30 +00:00
Alasdair Kergon
f18ec47d42 post-release 2011-01-24 23:34:46 +00:00
Alasdair Kergon
37f5c45dd5 pre-release 2011-01-24 23:24:06 +00:00
Alasdair Kergon
fdd24fd4e5 Bring lvscan man page up-to-date. 2011-01-24 20:02:07 +00:00
Alasdair Kergon
66a6d5c073 Fix lvchange --test to exit cleanly. 2011-01-24 14:19:05 +00:00
Alasdair Kergon
73b81bcd35 Add change_tag to toollib.
Allow multiple pvchange command line options to be specified together.
2011-01-24 13:38:31 +00:00
Milan Broz
4dff097f66 If other process finishes (or aborts) pvmove operation and
polling function cannot find any lv with PVMOVE flag, return
success and do not print  "aborting" message.

Fixes https://bugzilla.redhat.com/show_bug.cgi?id=602389
2011-01-19 23:11:39 +00:00
Milan Broz
ea0c01a2dd In some versions (RHEL6) dlm_create_lockspace() always
return lockspace reference (even if lockspace already exists)
and thus increases DLM lockspace count. It means that after
clvmd restart the lockspace is still in use.

(The only way to clean environment to enable clean cluster
shutdown is call "dlm_tool leave clvmd" several times.)

Because only one clvmd can run in time, we can use simpler logic,
try to open lockspace with dlm_open_lockspace() and only if it fails
try to create new one. This way the lockspace reference doesn not
increase.

Very easily reproducible with  "clvmd -S" command.

Patch also fixes return code when clvmd_restart fails and fixes
double free if debug option was specified during restart.

Fixes https://bugzilla.redhat.com/show_bug.cgi?id=612862
2011-01-19 23:09:31 +00:00
Jonathan Earl Brassow
e1ad66800a Remove duplicate statement (pasted twice by patch at some point?) 2011-01-19 19:24:07 +00:00
Jonathan Earl Brassow
f0306e63b2 Add test to make sure that a log device is retained when a
mirror image fails and the policies are set to:
 mirror_image_fault_policy = "replace"
 mirror_log_fault_policy = "remove"
2011-01-19 19:22:07 +00:00
Milan Broz
4dca082776 Remove DEBUGLOG from dmeventd. 2011-01-17 23:14:05 +00:00
Milan Broz
4efa0617b9 Add -f (don't fork) option to clvmd and fix clvmd -d<num> description. 2011-01-17 23:13:14 +00:00
Alasdair Kergon
3216fba3ca remove unused definitions 2011-01-17 19:02:44 +00:00
Alasdair Kergon
e954736299 post-release 2011-01-17 18:16:18 +00:00
Alasdair Kergon
c7e561d33d pre-release 2011-01-17 16:46:50 +00:00
Zdenek Kabelac
ce94c7ea73 Remove -f from list of targets for cleanup
rm -f is already in used for this cleanup.
2011-01-17 15:18:10 +00:00
Zdenek Kabelac
6789424259 Do not scan devices unnecessarily for reseting error counter
For reseting error counter use directly btree cached elements and do not
create whole dev_iterator.
2011-01-17 15:16:55 +00:00
Zdenek Kabelac
7a18ae6da4 Set default value to counter_last
Missed in previous commit and could be using some random stack value.
2011-01-13 15:03:28 +00:00
Zdenek Kabelac
5f2829b0a0 Do not log backtrace for correctly processed command 2011-01-13 15:00:29 +00:00
Zdenek Kabelac
49376579bf Include default make.tmpl
Use default clean targets for this Makefile
Use also same build flags for compiling testing tools like harness.
2011-01-13 14:58:59 +00:00
Zdenek Kabelac
8a3e1de5c8 Enable monitoring for cluster tests 2011-01-13 14:57:18 +00:00
Zdenek Kabelac
6470c383b2 Skip unnecessary lock_vol() call after volume deactivation
Improve condition within lock_vol so we are not calling extra unlock
if the volume just has been deactivated.

Patch uses lck_type and replaces negative 'and' condition to more
readable 'or' condition.
Few missing strace traces added.
2011-01-13 14:56:17 +00:00
Zdenek Kabelac
cbfb6504a8 Add exec_cmd paramater sync_needed
As sync_local_dev_names() cannot be called within activation context,
add new parametr which allows to select if the sync call is needed
before executing new command.
2011-01-13 14:51:32 +00:00
Zdenek Kabelac
a4cdc80836 Check for file with clvmd pid
Instead of checking  $LOCAL_CLVMD set during some 'aux' execution which
doesn't seem to be propagated to this shell - check for existance of pid
file of clvmd process - so this test is skipped in singlenode cluster test.
2011-01-13 11:05:27 +00:00
Zdenek Kabelac
b598e9482d Hotfix to stop harness busylooping
Harness seems to be able to busyloop in while cycle and not moving forward
for certain buffer - so check whethere there was some progress.
This fix allows to continue after failed cluster test.

Fix gcc warning for hiding global variable 's' -> sig.
2011-01-13 11:02:55 +00:00
Alasdair Kergon
0c83d60688 Replace fs_unlock by sync_local_dev_names to notify local clvmd. (2.02.80)
Introduce sync_local_dev_names and CLVMD_CMD_SYNC_NAMES to issue fs_unlock.
2011-01-12 20:42:50 +00:00
Petr Rockai
d2a3f3afa1 Kill clvmd properly after locking_type 3 tests (using clvmd/singlenode). 2011-01-12 16:07:55 +00:00
Alasdair Kergon
f1e5f9d8d5 add fio 2011-01-12 15:28:33 +00:00
Jonathan Earl Brassow
92c39fea53 s/log_verbose/log_error/ - Increase log level on error message. 2011-01-11 17:21:01 +00:00
Jonathan Earl Brassow
d67ac7ed5c Add disk to mirrored log type conversion. 2011-01-11 17:05:08 +00:00
Alasdair Kergon
9e397d92f5 post-release 2011-01-10 21:12:54 +00:00
Alasdair Kergon
f40795a119 reissue 2.02.80 2011-01-10 21:12:18 +00:00
Zdenek Kabelac
8d4f93634f Fix missing declaration for fs_unlock 2011-01-10 19:49:42 +00:00
Zdenek Kabelac
c141fe1c44 Avoid cookie sharing between forked processes
Before fork, ensure cookie is reset so it's not shared between processes.
2011-01-10 19:31:02 +00:00
Alasdair Kergon
2e776f6b50 post-release 2011-01-10 14:51:33 +00:00
Alasdair Kergon
437dac07eb pre-release 2011-01-10 14:28:17 +00:00
Zdenek Kabelac
77e3610e1a Speedup consequent activation calls
Stop calling fs_unlock() from lv_de/activate().
Start using internal lvm fs cookie for dm_tree.
Stop directly calling dm_udev_wait() and
dm_tree_set/get_cookie() from activate code -
it's now called through fs_unlock() function.

Add lvm_do_fs_unlock()

Call fs_unlock() when unlocking vg where implicit unlock solves the
problem also for cluster - thus no extra command for clustering
environment is required - only lvm_do_fs_unlock() function is added
to call lvm's fs_unlock() while holding lvm_lock mutex in clvmd.

Add fs_unlock() also to set_lv() so the command waits until devices
are ready for regular open (i.e. wiping its begining).

Move fs_unlock() prototype to activation.h to keep fs.h private
in lib/activate dir and not expose other functions from this header.
2011-01-10 14:02:30 +00:00
Zdenek Kabelac
a23f91ad2f Add internal fs cookie
Add functions for handling internal lvm cookie used for
all dm_tree operations until fs_unlock is called.
2011-01-10 13:44:39 +00:00
Zdenek Kabelac
4fef104bf1 Define DM_COOKIE_AUTO_CREATE
TODO: Use it also for already written code.
2011-01-10 13:42:31 +00:00
Zdenek Kabelac
57f8bc3081 Use strndup as GNU extension
strndup needs _GNU_SOURCE defined as it's GNU extension.
Remove duplicate string.h header added in previous harness commit.
2011-01-10 13:25:22 +00:00
Zdenek Kabelac
40fbdb71bd Cache config_tree
Start to use config_tree for cache just like vgmetadata.
When vgmetadata are erased destroy its cached config tree.
2011-01-10 13:15:57 +00:00
Zdenek Kabelac
7cbf6fa3a6 Change import_vg_from_buffer to use config_tree
Change function import_vg_from_buffer() to import_vg_from_config_tree().
Instead of creating config tree inside the function allow config tree to
be passed as parameter - usable later for caching.
2011-01-10 13:13:42 +00:00
Zdenek Kabelac
019423f769 Add default error path for get_property
Set invalid property value for error path when NULL handler is passed.
Fixes use of uninitialized prop structure as we return 'v' by value.
---
2011-01-10 13:07:58 +00:00
Zdenek Kabelac
4114ac5d22 Improve OCF dir support
Add --with-ocfdir configurable option.
Fix superfluous /usr in ocf_scriptdir instalation path.
2011-01-10 13:00:53 +00:00
Alasdair Kergon
1a2e58b067 Add aclocal.m4 (for pkgconfig). 2011-01-07 16:27:05 +00:00
Petr Rockai
9ce215ed01 Skip the api/percent test on kernels <= 2.6.32. 2011-01-07 15:04:52 +00:00
Petr Rockai
ad7bccb8ea Add a test for the interaction of snapshots of mirrors and lvconvert --repair. 2011-01-07 14:56:52 +00:00
Petr Rockai
3621b75306 Add a test for the interaction of transient errors and dmeventd mirror repair. 2011-01-07 14:56:10 +00:00
Petr Rockai
14908e3a3b Fix a typo in the new relaxed mirror check (test/lib/check.sh). 2011-01-07 14:42:35 +00:00
Alasdair Kergon
7ce032b99d Using Fedora 14's autoreconf. 2011-01-07 14:38:34 +00:00
Petr Rockai
dcaf6313af Remove *.t in test/api's make clean target. 2011-01-07 13:07:10 +00:00
Petr Rockai
b74d30d55a Disable activation/monitoring in testing lvm.conf by default. 2011-01-07 13:04:17 +00:00
Petr Rockai
9c705ce71e Relax the mirror test in test/lib/check.sh. 2011-01-07 13:03:46 +00:00
Petr Rockai
16af1b083b In aux, exit 200 no longer works as skip -- replace with real skip call. 2011-01-07 13:03:04 +00:00
Petr Rockai
f4d63e9e58 Fix up the make clean target in test/. 2011-01-07 12:26:02 +00:00
Zdenek Kabelac
cae4882865 Fix memory leak in filter creation error path
If some allocation for peristent filter fails its memory reference
was lost, fix it by calling filter's destructor.

Fix log_error messages for failing allocation.
2011-01-06 15:29:24 +00:00
Zdenek Kabelac
9d4bb244e3 Intentionaly ignore result from get_config_uint32 2011-01-06 15:25:07 +00:00
Zdenek Kabelac
41b1b9487c Remove dead assignment of 'ret' value
Variable 'ret' assigned from _do_event() was actually not used and replaced with next
assignment without any read of the returned value.

Code is reformated - so the error path is put in the if() branch and normal
code is put after the 'if' together with FIXME comment.


FIXME lowprio: logging needs to be fixed in this code,
 - multiple log_errors are printed, stacks are missing...
2011-01-06 10:45:41 +00:00
Zdenek Kabelac
6c1db4a7d6 Remove unneeded stack prints after log_error 2011-01-06 09:45:05 +00:00
Jonathan Earl Brassow
7b0c0103a9 Reverting recent commit to disallow adding/removing mirror log while
removing/adding mirror images... There was already code in there to
do the job - I just didn't find it in WHATS_NEW (or in the code right
away).
2011-01-05 23:18:46 +00:00
Jonathan Earl Brassow
ef6ec3f737 Prevent the user from simultaneously adding a mirror log while removing
a mirror image (or removing a log while adding a mirror).  Advise the
user to use two separate commands instead.

This issue become especially problematic when PVs are specified, as they
tend to mean different things when adding vs removing.  In a command that
mixes adding and removing, it is impossible to decern exactly what the
user wants.

This change prevents bug 603912.
2011-01-05 20:02:45 +00:00
Zdenek Kabelac
78269c2110 Check result of dm_snprintf for error 2011-01-05 15:10:30 +00:00
Zdenek Kabelac
eb91c54467 Add sys_debug loging for unlink
This unlink intentionally silently ignores any errors.
It's still worth to trace its error status in debug mode.
2011-01-05 15:06:10 +00:00
Zdenek Kabelac
56b6c12e8a Fix gcc warnings for missing headers and prototypes
Add missing header  string.h (strcmp, strndup).
Add 'static' to fix missing prototypes gcc warning.
Remove unused variables verbose_off, TESTDIR, PREFIX.
2011-01-05 15:03:43 +00:00
Petr Rockai
6507ce98df Remove obsolete bits of support code from test/. 2011-01-05 14:15:26 +00:00
Petr Rockai
2fbadde558 Re-enable t-lvconvert-repair-transient, avoiding the deadlock by running mkfs
asynchronously.
2011-01-05 14:15:01 +00:00
Zdenek Kabelac
5a363d74f2 Add missing error path tests 2011-01-05 14:03:36 +00:00
Zdenek Kabelac
b2817688ac Fail deamonization if lvmcache_init fail
FIXME Add proper cleanup
2011-01-05 12:59:46 +00:00
Zdenek Kabelac
659b348a44 Return PERCENT_INVALID for error case
If the percent value could not be determined return PERCENT_INVALID.
Indent function with tabs.
2011-01-05 12:33:51 +00:00
Zdenek Kabelac
6990a8c942 Remove unused variable mirr_state and its assignment 2011-01-05 12:27:56 +00:00
Zdenek Kabelac
98b4ee635d Remove dead assignment to thisfd
Value of 'thisfd' is not read again after its assigment
2011-01-05 12:17:19 +00:00
Petr Rockai
7d048d52f6 Fix another bug in t-pvcreate-operation-md (should pass now). 2011-01-05 01:04:47 +00:00
Petr Rockai
41fb54489f Add a test for RHBZ 640621 (metadata/dirs + no PV MDAs broken). 2011-01-05 01:00:34 +00:00
Petr Rockai
3be9ec308a Fix typo in t-pvcreate-operation-md.sh. 2011-01-05 00:38:41 +00:00
Petr Rockai
f120a17b1d Move the mkdtemp functionality into test/lib/utils.sh. 2011-01-05 00:26:19 +00:00
Petr Rockai
fc7054c150 Fix a couple of bugs in the new (test/lib/)aux.sh. 2011-01-05 00:25:07 +00:00
Petr Rockai
cde70f94db Substantial rework of the functional test support code. Some new features:
- somewhat neater, more consistent and more readable output
- possible to set any lvm.conf value: aux lvmconf "section/key = value"
- LVM_TEST_NODEBUG to suppress the (lengthy) "## DEBUG" output
- back-substitution on test output ($TESTDIR/$PREFIX -> @TESTDIR@/@PREFIX@)
- support code moved from test/ to test/lib/ --> less clutter
2011-01-05 00:16:18 +00:00
Jonathan Earl Brassow
8f0f328dbf Fix bug 635949: lvconvert man page clarification 2011-01-04 21:41:35 +00:00
Jonathan Earl Brassow
10e3bdcce3 Change to correct cmirrord return code is user visible - update WHATS_NEW
file.
2011-01-04 21:33:06 +00:00
Jonathan Earl Brassow
bc036c01cb Fix bug 632681: cmirrord shouldn't fail 'start' if already started
Return 0 from the cmirrord init script if the daemon is already
running.

http://refspecs.freestandards.org/LSB_3.1.1/LSB-Core-generic/LSB-Core-generic/iniscrptact.html
2011-01-04 21:16:54 +00:00
Peter Rajnoha
c14f565d18 Export DM_CONTROL_NODE_UMASK and use it while creating /dev/mapper/control. 2011-01-04 14:43:53 +00:00
Petr Rockai
f56007d95f Use system assert in test/api/percent.c, for now. 2011-01-03 15:07:39 +00:00
Milan Broz
65599cfb58 Fix wrongly paired unlocking of global lock in pvchange. (2.02.66) 2010-12-23 14:23:30 +00:00
Zdenek Kabelac
39b2b95477 Remove check for existance of vg pointer
Checking for vg being != NULL in this place is not needed.
Pointer vg is already dereferced in this function above this code line.
Also this internal function _read_pv is always called with valid 'vg' pointer.
2010-12-22 15:44:09 +00:00
Zdenek Kabelac
e65561feae Add backtraces for backup and backup_remove fail paths 2010-12-22 15:36:41 +00:00
Zdenek Kabelac
2a2170efc0 Hide unused code into if 0
Make it obvious for lcov coverage and static analyzis we
are not interested in this piece of code.
2010-12-22 15:32:15 +00:00
Zdenek Kabelac
adb5fb90b1 Detect errors from dm_task_set calls
Check for errors in dm_task_set calls.
Use  goto_bad macro with stack trace.
Replace  label  failed: with bad:.
2010-12-22 15:28:44 +00:00
Zdenek Kabelac
ad9e16c198 Fix last pthread_join fix commit
Call for pthread_join() does not set errno value even though return values
looks like that. For now assign errno from return value and still use
strerror() to print some error message as this seems to be commonly used.

Add also log_sys_error() message for error close of local pipe.
2010-12-22 14:00:28 +00:00
Zdenek Kabelac
96a20ae9d4 Add backtraces for archive and backup_locally
If archive or back_locally fails - add stack trace.
2010-12-22 13:45:33 +00:00
Zdenek Kabelac
0cadf7ba66 Fix memory leak in debug mode of restart_clvmd() error path 2010-12-22 12:14:11 +00:00
Zdenek Kabelac
bb5ccf8869 Log error state from pthread_join operation
Value jstat is unused - so replace it with logging via log_sys_error().
2010-12-22 12:10:56 +00:00
Zdenek Kabelac
57d19b3f1b Remove dead assignment of lv_total and lv_capasity_total
Variables 'lv_total' and 'lv_capasity_total' are unused.
2010-12-22 12:06:54 +00:00
Alasdair Kergon
e45d947322 post-release 2010-12-21 21:08:51 +00:00
Alasdair Kergon
b44cdfc143 Copyright notices for new files. 2010-12-21 01:14:34 +00:00
Alasdair Kergon
385933819e pre-release 2010-12-21 01:08:29 +00:00
Zdenek Kabelac
58e3ba19a2 Add missing test for reallocation error. 2010-12-20 14:38:22 +00:00
Zdenek Kabelac
0f3ab63a24 Remove dead assignment
Variable 'r' is never read so remove it and just cast result from
_error_device function to (void).
2010-12-20 14:36:12 +00:00
Zdenek Kabelac
2697eddcba Verbose log old_umask value
Use old_umask value and print its content through verbose log.
2010-12-20 14:34:49 +00:00
Zdenek Kabelac
90ba925a19 Add internal error if pointer is uninitialized
Add simple check for existance of 'pl' and printer internal error message
if device is missing instead of plain crash.
2010-12-20 14:20:52 +00:00
Zdenek Kabelac
b5d23f1591 Add check for unlink errors 2010-12-20 14:08:46 +00:00
Zdenek Kabelac
e6f0290d9f Remove unused variable label
Variable 'label' is unused in _format1_pv_write().
2010-12-20 14:06:33 +00:00
Zdenek Kabelac
23a3de988f Remove unused variable dev_dir_provided 2010-12-20 14:05:31 +00:00
Zdenek Kabelac
c4fc6c2381 Remove dead assignment of segh
Variable 'segh' is never read again after this assignment.
2010-12-20 14:04:43 +00:00
Zdenek Kabelac
9bc24aea56 Remove dead store in lvm_run_command
Variable 'ret' is not read before its next assignment.
2010-12-20 13:59:52 +00:00
Zdenek Kabelac
8392d2853e Removed unused pointer
Pointer 'duplicate' is unused.
2010-12-20 13:58:38 +00:00
Zdenek Kabelac
28708849a9 Use dm_free for dm_malloc-ed areas in _clog_ctr/_clog_dtr (cmirrord).
Use dm_zalloc to obtain zeroed memory block.
Use dm_free for dm_ allocated memory blocks.
Test close() for error.
2010-12-20 13:57:19 +00:00
Zdenek Kabelac
9f1538687a Add checks for allocation errors in config node clonning.
Add checks for clonning allocation a fail-out when something is
not allocated correctly.

Also move var declaration to the begining of the function
and fix log_error messages.
2010-12-20 13:53:10 +00:00
Zdenek Kabelac
0ae26564ce Replace multiple fprintf calls with one large string parameter 2010-12-20 13:48:28 +00:00
Zdenek Kabelac
9c146b8a6f Fix error path if regex engine cannot be created in _build_matcher().
Fix only 'stack' printing with full function error exit.
2010-12-20 13:45:39 +00:00
Zdenek Kabelac
2913ac8b4e Use const char * for name and old_name in vg
Switch to use const char pointers to avoid changes of these structure
members and having better control over, were these members could
be modified.
2010-12-20 13:40:46 +00:00
Zdenek Kabelac
6556fcc3fc Use const char* for offset calculation
As 'const' types are also passed to macro dm_list_struct_base -
keep offset calculation with const char pointers.
Fixes several gcc constness warnings.
2010-12-20 13:39:12 +00:00
Zdenek Kabelac
1c6b3062bf Switch void* to char* arithmetic 2010-12-20 13:37:26 +00:00
Zdenek Kabelac
b00600c247 Remove const usage from destroy callbacks
As const segment_type or const format_type are never released
use their non-const version and remove const downcast from dm_free calls.
This change fixes many gcc warnings we were getting from them.
2010-12-20 13:32:49 +00:00
Zdenek Kabelac
97d680c924 Fix wrong cast to char*
As cmd->cmd_line is already const char pointer it's not needed to cast already
const char pointer to char pointer.
2010-12-20 13:28:04 +00:00
Zdenek Kabelac
8f606cf0f6 Use const char* const * for dm_regex_create()
Change API interface to accept even completely const array patterns.

This should present no change for libdm users and allows to pass
pattern arrays without cast to const char **.
2010-12-20 13:23:11 +00:00
Zdenek Kabelac
c05aa13dcf Some const cleanups
Minor const warning fixes and internal API updates.
2010-12-20 13:19:13 +00:00
Zdenek Kabelac
12389f2089 update 2010-12-20 13:17:56 +00:00
Zdenek Kabelac
b3cb194c03 Test return value from read() and close() for an error. 2010-12-20 13:16:30 +00:00
Zdenek Kabelac
1ea4c6d449 Add more strict const pointers around config tree
To have better control were the config tree could be modified use more
const pointers and very carefully downcast them back to non-const
(for config tree merge).
2010-12-20 13:12:55 +00:00
Zdenek Kabelac
e5526cb27e Move var declarations to function begining
As assert macro jumps to 'bad:' label - we need vg initialized.
2010-12-20 12:29:39 +00:00
Zdenek Kabelac
8a8a182679 Fix NULL pointer check for *buf
As ternary operator has lower priority then add operation, this check
was not doing what seemed to be expected.

So enclose the test in braces and check for NULL in *buf.
2010-12-17 12:37:49 +00:00
Alasdair Kergon
2f19cff4df Fix device.c #include to ensure 64-bit fopen64 use. (2.02.51) (robbat2) 2010-12-15 12:49:55 +00:00
Petr Rockai
0bceef8c78 Fix sed substitution in copying tests to builddir (test/Makefile.in). 2010-12-14 23:23:45 +00:00
Petr Rockai
40eee88b06 Add getters for copy_percent and snap_percent to the lvm2app API. 2010-12-14 23:20:58 +00:00
Petr Rockai
0c79543cfe Add further consistency checking to vg_validate, ensuring that all segment
areas point to LVs or PVs that are listed in the respective VG.
2010-12-14 17:51:09 +00:00
Petr Rockai
6ea66f6925 Hack up the RUN_BASE computation in the test Makefile.in a bit more (so that it
actually works... sometimes).
2010-12-14 17:38:42 +00:00
Petr Rockai
8891680304 Add a validation step for pvmoveN internal LVs to vg_validate. 2010-12-14 17:07:35 +00:00
Peter Rajnoha
1f80f8f371 HAVE_SELINUX again 2010-12-13 12:44:09 +00:00
Peter Rajnoha
d2fefb2e80 #ifdef HAVE_SELINUX and #ifdef HAVE_SELINUX_LABEL_H 2010-12-13 12:30:04 +00:00
Peter Rajnoha
b3032f918e Missing #elif HAVE_SELINUX 2010-12-13 12:18:38 +00:00
Milan Broz
bb37f26bbf Update configure. 2010-12-13 11:03:10 +00:00
Peter Rajnoha
e805bfb103 Create /var/run/lvm directory during clvmd initialisation if missing.
We need to be sure that /var/run and /var/lock is always there.
(E.g. these two directories could be using tmpfs which then loose
all the content after reboot.)
2010-12-13 10:49:02 +00:00
Peter Rajnoha
a1cbc61c34 Add new dm_prepare_selinux_context fn to libdevmapper and use it throughout.
Detect existence of new SELinux selabel interface during configure.
Use new dm_prepare_selinux_context instead of dm_set_selinux_context.

We should set the SELinux context before the actual file system object creation.
The new dm_prepare_selinux_context function sets this using the selabel_lookup
fn in conjuction with the setfscreatecon fn. If selinux/label.h interface
(that should be a part of the selinux library) is not found during configure,
we fallback to the original matchpathcon function instead.
2010-12-13 10:43:56 +00:00
Petr Rockai
e3deef3baa Remove a redundant %.o: %.c rule from test/api/Makefile.in. 2010-12-12 22:26:47 +00:00
Petr Rockai
4b81bb549d Fix the sed expression to get "base" names of tests. 2010-12-12 21:17:25 +00:00
Petr Rockai
c0274f4018 Only build the required bits under api/ on make check. 2010-12-12 21:08:00 +00:00
Petr Rockai
46ac061fb3 Do not build vgtest (superseded by vgtest.t). Fix the build line for .t files. 2010-12-12 20:49:38 +00:00
Petr Rockai
5e4a1476b3 First go at a somewhat more comprehensive mechanism to run "unit" tests for the
lvm2app API. Further factoring of the support code needed. RHBZ 654445
2010-12-12 20:36:38 +00:00
Alasdair Kergon
3415f08149 Fix scanning of VGs without in-PV mdas.
Set cmd->independent_metadata_areas if metadata/dirs or disk_areas in use.
- Identify and record this state.

Don't skip full scan when independent mdas are present even if memlock is set.
- Clusters and OOM aren't supported, so no problem doing the proper scans.

Avoid revalidating the label cache immediately after scanning.
- A simple optimisation.

Support scanning for a single VG in independent mdas.
- Not used by the fix but I left it in anyway as later patches might use it.
2010-12-10 22:39:52 +00:00
Milan Broz
c384ad4074 Try to detect fail in clvmd startup in tests. 2010-12-09 11:19:21 +00:00
Alasdair Kergon
8a9dbd13e7 . 2010-12-09 00:10:24 +00:00
Alasdair Kergon
1e2f3763cb . 2010-12-08 23:09:45 +00:00
Alasdair Kergon
bc9b386f80 Rename vg_release to free_vg. 2010-12-08 20:50:48 +00:00
Alasdair Kergon
b4940ed53c Cope better with an undefined target_percent operation in _percent_run. 2010-12-08 19:26:35 +00:00
Zdenek Kabelac
1e9221af45 Remove reset of vg->vgmem pointer as it is access of already release memory
This reset of vgmem pointer causes access of already released memory.
(_vg_make_handle allocates vg from vgmem pool itself - which is a bit tricky)

Interestingly this memory fault was missed by our test suite.
2010-12-08 10:45:37 +00:00
Alasdair Kergon
200675b506 post-release 2010-12-06 22:13:10 +00:00
Alasdair Kergon
6424bd9e6d pre-release 2010-12-06 17:57:14 +00:00
Alasdair Kergon
7ce425114e Fix debug logging of derived flag LCK_CACHE in clvmd. 2010-12-06 17:37:09 +00:00
Zdenek Kabelac
b0e9ba5f4b Check str_list_add() success
Report error if str_list_add fails.
2010-12-01 13:05:06 +00:00
Zdenek Kabelac
7837f37e21 Check lv_info() success
Add log_error message for lv_info failure and exit from futher
processing.

Replace 'leg' occurence in debug message with 'image' which
is used in other messages.
2010-12-01 13:01:36 +00:00
Zdenek Kabelac
966bfcef27 Add backtraces for errors
Add stack;  backtraces when error is reported from dev_set() or
dev_close_immediate().
2010-12-01 12:56:39 +00:00
Zdenek Kabelac
70ebb39097 Log error from unlink failure 2010-12-01 12:41:49 +00:00
Zdenek Kabelac
60d340b387 Test lv_name is not NULL
Patch adds extra check for lv_name not being NULL.
Test avoids unneeded strlen call for this case.
Otherwise there is no functional change as test would fail on
size_t comparation even for NULL lv_name (thus there is no risk
of NULL dereference when taking 'true' if branch.
2010-12-01 12:22:49 +00:00
Zdenek Kabelac
22812bc83c Add logging for pipe write() and close() error
Check values from write() and close() system calls.

FIXME: Missing wrapper around 'write()'.
2010-12-01 10:46:20 +00:00
Zdenek Kabelac
83c37777f1 Check result of vginfo_from_vgname
Check for some potential internal error.
2010-12-01 10:39:28 +00:00
Zdenek Kabelac
a200f50bbe Optimize second call to strchr with same parameters
Small optimalization - reusing already known strchr result.
2010-12-01 10:36:25 +00:00
Zdenek Kabelac
a2f563f84f Fallback to full rescan for missing device
Fix bug when NULL could have been passsed as 'data'
to _add_pv_to_list() if 'dev' is NULL.

Now it fallbacks to complete scan.
2010-12-01 10:33:55 +00:00
Petr Rockai
d5c94a08a7 What's new. 2010-11-30 23:03:35 +00:00
Zdenek Kabelac
9fc47d67df Remove unneeded test for NULL
Remove check for system_id (it is defined as int8_t[], so cannot be NULL).
2010-11-30 22:57:35 +00:00
Zdenek Kabelac
c697497923 Test uuid for NULL
Add test for NULL before passing uuid as src argument to memcpy.
As memcpy function is declared as function not accepting NULL.
Though we pass NULL only with zero length so this patch presents
no functional change to the code.
2010-11-30 22:53:37 +00:00
Zdenek Kabelac
b13837c2c0 Add stack trace for error path
If dm_task_set_cookie() fails print stack trace, but keep going on.
2010-11-30 22:40:19 +00:00
Zdenek Kabelac
5eb6300c77 Add error path stack traces
Check for errors from dm_task_set_name() and dm_task_run().
Add stack traces for error paths.
Return 0 if some error is found.
2010-11-30 22:32:44 +00:00
Zdenek Kabelac
a1b86401a5 Remove check for lv is NULL
'lv' is deferenced in the begining of the function so any check
later is not helpful.

Parameters for dev_manager_transien() are marked as nonnull.
2010-11-30 22:28:06 +00:00
Zdenek Kabelac
5dcb17df67 Add missing test for failed pool allocation
Add test for NULL from dm_poll_create.
Reorder dm_pool_destroy() before file close and add label out:.
Avoid leaking file descriptor if the allocation fails.
2010-11-30 22:23:35 +00:00
Zdenek Kabelac
5b94a87589 Replace snprintf with dm_snprintf
Use dm_snprintf with known error case return code (-1).
2010-11-30 22:16:25 +00:00
Zdenek Kabelac
3698ce35b1 Check reallocated buffer for NULL before use
As *buf is reallocated in case CLVMD_CMD_TEST: test for NULL is needed
before printing status.
(realloc() == NULL and status != 0)
2010-11-30 22:11:26 +00:00
Petr Rockai
e6ce16ae13 Refactor the percent (mirror sync, snapshot usage) handling code to use
fixed-point values instead of a combination of a float value and an enum.
2010-11-30 11:53:31 +00:00
Petr Rockai
7e8110ec50 Fix a failing test (it used a combination of lvconvert parameters that is no
longer permitted.)
2010-11-30 11:35:32 +00:00
Petr Rockai
482cef7121 Avoid the automatic MISSING_PV recovery path in commands with special
MISSING_PV handling (cmd->handles_missing_pvs is set).
2010-11-30 11:15:54 +00:00
Alasdair Kergon
3c3e95a5a8 Fix memory leak when VG allocation policy in metadata is invalid.
Ignore unrecognised allocation policy found in metadata instead of aborting.
Fix another missing vg_release() in _vg_read_by_vgid.
2010-11-29 18:35:37 +00:00
Zdenek Kabelac
e0a4b67e11 Optimize lookup table read
Reread lookup table only when needed.
2010-11-29 14:25:13 +00:00
Zdenek Kabelac
56c7200837 Remove dead assignment in wait_for_child
'pid' is not used anywhere - remove it.
2010-11-29 12:44:52 +00:00
Zdenek Kabelac
0381e82b54 Remove dead assignment in _step_matcher
'ns' is not used after this assignment and it is reassigned with the following
code, so dropping this assignment.
2010-11-29 12:43:49 +00:00
Zdenek Kabelac
28b075f78d Remove dead assignment in _mirror_emit_segment_line
Remove unused 'r' assignment.
2010-11-29 12:42:10 +00:00
Zdenek Kabelac
4f4c217659 Fix memory leak in error path
Nicely hidden memory leak in outf macro error path.
This macro is using out_text() and does automagical return_0.
That would leak tag_buffer allocated memory.

As there was same code for tags output - create _out_tags() function.
2010-11-29 12:19:58 +00:00
Zdenek Kabelac
55d9104463 Use one fprintf call for usage print
Replace multiple fprintf calls with multiline one.
2010-11-29 12:15:41 +00:00
Zdenek Kabelac
be2679a691 Remove dead assignment in dm_tree_node_add_mirror_target_log
'seg' is never used - remove it.
2010-11-29 11:26:00 +00:00
Zdenek Kabelac
69281c1eab Remove dead assignment in 'main'
'ret' is never read anywhere - remove it.
2010-11-29 11:23:14 +00:00
Zdenek Kabelac
ea249ebca7 Remove unused 'i' in _pv_analyze_mda_raw
'i' is unused in the function - remove it.
2010-11-29 11:16:58 +00:00
Zdenek Kabelac
e60fe35e6b Remove dead assignment in lvm2_main
'alias' is not read again in this code path.
Also 'alias' is already equal to 0 in this place.
2010-11-29 11:14:33 +00:00
Zdenek Kabelac
a1945b9f12 Remove dead assignment in _lock_for_cluster
'saved_errno' is not read from this initialization and before its
usage is assigned again before _cluster_free_request() call.
2010-11-29 11:13:12 +00:00
Zdenek Kabelac
d41c8aed57 Reset vg pointer after release
Set vg to NULL after releasing it as the following memlock() test may
lead to goto for the second call of vg_release() with the already
released vg pointer.
2010-11-29 11:08:14 +00:00
Zdenek Kabelac
fbf215ade2 Remove printing of LCK_CACHE
LCK_CACHE is defined as 0x100 so it cannot be passed through
unsigned char parameter - remove it from the sprintf code.

If the LCK_CLUSTER should be printed here - lot of code need
to be reworked - so adding FIXME comment.
2010-11-29 11:05:15 +00:00
Zdenek Kabelac
5b679c060d Fix check for empty system_dir
Fixing check for zero length system_dir string.
2010-11-29 10:58:32 +00:00
Zdenek Kabelac
acb88b5339 Cleanup remove test for NULL
dm_free is testing NULL itself
2010-11-29 10:11:50 +00:00
Mike Snitzer
c3d7df946e Fix "it's" typo to be "its" in lvconvert error message. 2010-11-28 18:37:33 +00:00
Petr Rockai
1dea3b9fab Update WHATS_NEW. 2010-11-25 17:16:41 +00:00
Petr Rockai
3801cb524c Disallow certain lvconvert operations that need to both allocate and free
extents, while physical volumes are specified. Fixes BZ 640051.
2010-11-25 17:15:46 +00:00
Petr Rockai
001b2c264b All 'size' values of lvm2app properties should be in bytes.
Fix 'seg_size' to return bytes.

Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-25 14:39:02 +00:00
Petr Rockai
f1e5a95d89 Add interactive tests for functions to lookup a pv|lv by name|uuid.
Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-25 14:35:46 +00:00
Petr Rockai
8779301c8d This patch adds helpers to allow users to lookup a lv or pv handle by
uuid (given a vg_t of course).

Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-25 14:34:51 +00:00
Petr Rockai
2ac0e8f989 This patch adds helpers to allow users to lookup a lv or pv handle by
name (given a vg_t of course).

Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-25 14:33:44 +00:00
Zdenek Kabelac
f912e2c887 Add missing destrustion of cmd_context
Lvm1 fallback code missed to destroy cmd_context in error path.
2010-11-24 09:53:31 +00:00
Zdenek Kabelac
732528e1c0 Fix memory leak in error path
Release allocated path buffer in error path.
2010-11-24 09:43:18 +00:00
Zdenek Kabelac
90222d0232 Fix resource leak of dlopened pointer
Add missing dlclose in _init_formats() error path.
Use return_0 to print stack trace from the call.
2010-11-24 09:34:34 +00:00
Zdenek Kabelac
85933f336c Add missing fclose
Fixing resource leak in _umount().

CHECKME: mountpoints with spaces need to be checked
2010-11-23 21:19:45 +00:00
Zdenek Kabelac
dc60e49a4d Move arg_vgnames from local scope
As gcc puts probably all vars on stack this bug was not noticed in runtime.
Patch fixes referencing local scope list variable.
2010-11-23 20:39:13 +00:00
Zdenek Kabelac
d1626c2433 Do not call dm_task_destroy with NULL 2010-11-23 18:29:06 +00:00
Zdenek Kabelac
2b3e80e233 Add missing closedir() - fixes resource leak 2010-11-23 15:28:54 +00:00
Zdenek Kabelac
0eb26e4582 Move va_end() so it is also used before error path return 2010-11-23 15:08:57 +00:00
Zdenek Kabelac
79a9cb910a Move va_end(ap) so we do not leave with return -1 without calling it.
Remove unneeded ';'
2010-11-23 15:00:52 +00:00
Alasdair Kergon
5b958643be Suppress 'No PV label' message when removing several PVs without mdas. 2010-11-23 01:55:53 +00:00
Alasdair Kergon
6e17f8809d Fix default /etc/lvm permissions to be 0755. (2.02.66) 2010-11-22 21:39:47 +00:00
Alasdair Kergon
75ab0236a9 post-release 2010-11-22 18:37:56 +00:00
Alasdair Kergon
4928dded55 pre-release 2010-11-22 14:25:22 +00:00
Alasdair Kergon
61325c32b0 Fix _output_field crash from field_id free with DEBUG_MEM. (Phillip Susi) 2010-11-19 13:17:27 +00:00
Petr Rockai
4f2136c654 The _free_vg that is created as a placeholder when reporting segments in pvs
was lacking the (vgmem) pool. We now create that pool. There is at least one
more such VG (_dummy_vg) which is pool-less. I am not sure what is the right
way to go about this, but this is currently necessary to fix a segfault
introduced by using vgmem in the reporter in Dave's lvseg lvm2app patches.

Signed-off-by: Petr Rockai <prockai@redhat.com>
2010-11-17 22:26:42 +00:00
Petr Rockai
9489a09e01 Update interactive tests for lvm2app lvseg and pvseg apis.
Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-17 20:13:51 +00:00
Petr Rockai
b55eb45e6a Add lvm2app function to query pvseg properties.
Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-17 20:12:39 +00:00
Petr Rockai
294097601d Add the macro and specific 'get' functions for pvsegs.
Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-17 20:11:27 +00:00
Petr Rockai
92afd60d92 Add a new type and function to lvm2app to enumerate pvsegs.
Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-17 20:10:42 +00:00
Petr Rockai
6bb64bc0b7 Add lvm2app function to query lvseg properties.
Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-17 20:09:42 +00:00
Petr Rockai
16138ebca3 Add the macro and specific 'get' functions for lvsegs.
Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-17 20:08:14 +00:00
Petr Rockai
bcf4cf83e4 Add a new type and function to lvm2app to enumerate lvsegs.
Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-17 20:07:01 +00:00
Petr Rockai
5d3834485e Make value.string const char *, in properties.h, to fix a warning introduced by
the previous patch set.
2010-11-17 19:50:15 +00:00
Petr Rockai
4bd06a929d Add vg_set_property to the interactive lvm2app test program.
Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-17 19:17:07 +00:00
Petr Rockai
9f8d31c4a2 Implement lvm_vg_set_property() by calling internal 'set' property function.
Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-17 19:16:05 +00:00
Petr Rockai
e01d8e9702 Add generic infrastructure to internal library to 'set' a property.
Similar to 'get' property internal functions.
Add specific 'set' function for vg_mda_copies.

Signed-off-by: Dave Wysochanski <wysochanski@pobox.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-11-17 19:15:10 +00:00
Alasdair Kergon
212f7e0409 Remove tag length restriction and allow / = ! : # & characters. 2010-11-17 10:19:29 +00:00
Peter Rajnoha
676492a634 WHATS_NEW FIXME 2010-11-12 16:04:52 +00:00
Alasdair Kergon
39311f404e Support repetition of --addtag and --deltag arguments.
Add infrastructure for specific cmdline arguments to be repeated in groups.
Split the_args cmdline arguments and values into arg_props and arg_values.
2010-11-11 17:29:05 +00:00
Zdenek Kabelac
60b5c7dab1 Preserve const for char pointer
Keep char pointers 'const'  (introduced with cling commit).
2010-11-11 12:32:33 +00:00
Zdenek Kabelac
fe80296ff0 Add date configurable variable DATE
Follow the rule to run every command through variable dereference.
Add a runtime check of translated date to seconds.
2010-11-11 12:17:15 +00:00
Zdenek Kabelac
e945819706 fsadm fix for downsize of unmounted fs
Fix for the last commit as $MOUNTED is not only used as bool flag,
but also store mounted location for remount - so parsing output
from mount differently then from /proc/mounts.

Prefix calls of 'tunefs' tools with LANG=C to be sure we always do get
some nonlocalized strings.

Avoid using forced 'resize2fs' for cleanly unmounted filesystems and
run regular fsck -f for this case as required by resize2fs.

'fsadm check' uses date difference for extX filesystems between
the last mount and last check of 'fsck -f' execution and if the mount
was later run 'fsck' with -f so resize2fs is happy and user does not
need to pass '-f' flag.
2010-11-10 16:14:02 +00:00
Zdenek Kabelac
caad1b4ff9 Update fsadm regresion 2010-11-10 10:05:27 +00:00
Zdenek Kabelac
2286995abd Scan also 'mount' output for mounted filesystem.
As util-linux package seems to give all the time different names,
try harder to figure out, where is the given lv possible mounted
and scan /proc/mounts and if not found there, test also 'mount' output.

/dev/dm-xxx
/dev/mapper/vg-lv
/dev/vg/lv

All of them could be used different combination in /proc/mount and mount output.

Patch fixes regression for older systems where new detection code failed to
find valid combination.
2010-11-10 10:03:07 +00:00
Alasdair Kergon
3548ba3827 Extend cling allocation policy to recognise PV tags (cling_by_tags).
Add allocation/cling_tag_list to lvm.conf.
2010-11-09 12:34:40 +00:00
Alasdair Kergon
de2c6ce36e Regenerate configure with 'autoreconf' for --enable-ocf. (2.02.76) 2010-11-09 11:15:34 +00:00
Alasdair Kergon
af1144cac0 forgotten to regenerate last time configure.in was updated 2010-11-09 11:14:06 +00:00
Alasdair Kergon
4894259eca post-release 2010-11-09 02:58:06 +00:00
Alasdair Kergon
ab3b542a18 pre-release 2010-11-08 19:37:40 +00:00
Zdenek Kabelac
262ff6e9d3 Fix include commit and switch to use DISTCLEAN_TARGETS
Fixing warning introduced by 'include make.tmpl' commit.
Produced this warning:

Makefile:29: warning: overriding commands for target `distclean'
../make.tmpl:366: warning: ignoring old commands for target `distclean'
2010-11-08 14:19:48 +00:00
Peter Rajnoha
968c382706 Clarify error messages when activation fails due to activation filter use. 2010-11-05 18:18:11 +00:00
Zdenek Kabelac
d99a0313a7 Add OCF support
Updated patch from Florian Haas from Linux-HA project.

User needs to 'configure --enable-ocf' to get file installed
by 'make install' target by default.
User can also use 'make install_ocf' to get only ocf files installed.
With disabled (default) ocf support - no ocf files are installed.

FIXME: ocf installation path needs to be kept in sync with pacemaker.
find better way and possible also better location.
2010-11-05 16:18:38 +00:00
Zdenek Kabelac
ad2a90ccc6 Add given user prefix to make target install_initscripts
Avoid files to be written into the live system if lvm was
configured with different --prefix.

Use initdir for install target path.
2010-11-05 16:13:18 +00:00
Zdenek Kabelac
64013278fb Use include make.tmpl
Makes easier to use recursive targets and simplifies Makefile.
2010-11-05 16:10:08 +00:00
Alasdair Kergon
86a974c3c3 lost line 2010-11-02 20:10:35 +00:00
Alasdair Kergon
f56361e29f Fix regex optimiser not to ignore RHS of OR nodes in _find_leftmost_common. 2010-11-02 19:56:33 +00:00
Zdenek Kabelac
b53badf298 Use new status code from fsadm check
Patch updates exec_cmd() and adds 3rd parameter with pointer for
status value, so caller might examine returned status code.
If the passed pointer is NULL, behavior is unmodified.

Patch allows to confinue with lvresize if the failure from fsadm check is
caused by mounted filesystem as many of filesystem resize tools do support
online filesystem resize. (originally user had to use flag '-n' to bypass
this filesystem check)
2010-11-01 14:17:35 +00:00
Zdenek Kabelac
93bd371b57 Add DIAGNOSTICS section to fsadm man page. 2010-11-01 14:10:46 +00:00
Zdenek Kabelac
3bb9c8e43e Return different status code for fsadm check of mounted filesystem
Return status code 3 for fsadm check of mounted filesystem - used later with
lvresize update patch to better support online filesystem resize.

Also makes a more consistent user interruption and returns status code 2
in this case.
2010-11-01 14:08:51 +00:00
Peter Rajnoha
09c6b90d76 Use dm_strdup/dm_free instead of strdup/free. 2010-11-01 13:50:51 +00:00
Peter Rajnoha
032a206a83 Allocate buffer for reporting functions dynamically to support long outputs.
Fix memory leak of field_id in _output_field function.

There's been a patch added recently to use dynamic allocation for metadata
tags buffer to remove the 4k limit (for writing metadata out). However, when
using reporting commands like vgs and lvs, we still need to fix libdm reporting
functions themselves to support such long outputs. So the buffer used in those
reporting functions is dynamic now.

The patch also includes a fix for field_id memory leak which was found in
the _output_field function.
2010-11-01 13:31:55 +00:00
Alasdair Kergon
735d51276c Update VG metadata only once in vgchange when making multiple changes.
Allow independent vgchange arguments to be used together.
(Still more inconsistencies to iron out here.)
2010-10-29 21:15:23 +00:00
Petr Rockai
bc91078978 Update WHATS_NEW. 2010-10-29 16:44:47 +00:00
Petr Rockai
8179a2dc9c Add code to the dmeventd snapshot plugin to automatically unmount snapshots
that have been invalidated.
2010-10-29 16:43:51 +00:00
Petr Rockai
ab98a1c5dc Alasdair correctly pointed out that if the two closes are concurrent (I haven't
checked, so they *might*), there is still a race possibility with the last
fix. This patch fixes that.
2010-10-27 11:40:14 +00:00
Petr Rockai
d9f958852c Add a missed test for dmeventd -R. 2010-10-27 09:16:31 +00:00
Petr Rockai
27b0895f6d Update WHATS_NEW. 2010-10-27 09:15:48 +00:00
Petr Rockai
fec4084b90 Fix a double close in clvmd.
The management threads (main_loop, the socket thread) could close a single fd
twice in a row sometimes. At least one other thread can be running at the same
time as the threads doing the double close. That one running thread also
happens to do some IO (namely, open /proc/devices, read from it, close it). If
there was enough "demand" for the local socket, this could happen:

- a connection to clvmd is about to finish, let's say the fd is 13 (it often
  happens to be in my test script, don't ask why)
- the local_sock thread calls close(13)
- the lvm thread calls open("/proc/devices"...) and gets 13
- the main_loop thread calls close(13) [OOPS!]
- new connection arrives, and is accept'd by a (new) local_sock thread
- the accept gives an fd of 13 (since it's the lowest free fd at this point)
- the lvm thread gets around to read from it's /proc/devices handle... 13,
  again
- the lvm thread hangs forever trying to read from the socket instead of
  /proc/devices

Signed-off-by: Petr Rockai <prockai@redhat.com>
Reviewed-by: Milan Broz <mbroz@redhat.com>
2010-10-27 09:13:37 +00:00
Petr Rockai
9a9d7c1c55 Use a more generous timeout for dmeventd in t-lvconvert-repair-dmeventd. 2010-10-26 12:53:07 +00:00
Petr Rockai
0f287ed3dd Let's rely on the exit code and do not test for empty output. 2010-10-26 12:52:25 +00:00
Zdenek Kabelac
cf048beade Add missing return for NULL passed buffer
Function pull_stateo() checks for NULL 'buf' - but return for this error
path was missing.  cmirror code never calls this function with NULL 'buf',
so this fix has no effect on current code base, but makes clang happier.
2010-10-26 10:14:41 +00:00
Zdenek Kabelac
558c22a454 Macro uninitialized_var gives warnings in static analysis
Deactivate uninitialized_var() macro for clang static analysis.
2010-10-26 10:04:34 +00:00
Zdenek Kabelac
683f33139e Hotfix usage of __builtin_unreachable()
It's quite new feature which is not supported by older compilers.
So until some better macros are introduced into LVM code - hotfix current
compilation problems and compile this code only for __clang__ defining compilers.
2010-10-26 09:57:03 +00:00
Zdenek Kabelac
810abed9ea Fix NULL pointer dereference for too large MDA error path
Replace dereference of NULL vg with passed vgname to the function
_vg_read_raw_area() in the error path for too large MDA.
2010-10-26 09:13:13 +00:00
Zdenek Kabelac
9bd893bbc7 Instrument compiler about code unreachability
Clang needs some instrumentation help for static code analysis.
It helps gcc and human reader as well.
2010-10-26 09:01:47 +00:00
Zdenek Kabelac
abf79b3678 Use static indentifier for internal functions
Functions _align_chunk() and  _new_chunk() are used only internally inside
pool-fast.c - so keep them static inside this object file.
2010-10-26 08:59:05 +00:00
Zdenek Kabelac
290d1d8c3f Update C declaration () -> (void) 2010-10-26 08:54:37 +00:00
Zdenek Kabelac
f1ec47e131 Remove bufused for calculation
As bufused is assigned 0 in preceding source line
clang Idempotent operation
2010-10-26 08:53:25 +00:00
Mike Snitzer
0e36229da2 Fix vgchange to process -a, --refresh, --monitor and --poll like lvchange.
Simultaneous -a and --refresh is not valid.
poll+monitor are valid together with or without -ay* (but not with -an*)

No longer print polling results summary if no LVs in the VG were polled.
2010-10-26 01:37:59 +00:00
Mike Snitzer
5d5d6e4d4a Explicitly have grep print the matching filenames (needed to
successfully determine scsi_debug device).
2010-10-26 01:25:46 +00:00
Dave Wysochanski
543f8e8cca Update WHATS_NEW for lvm2app property functions 2010-10-25 17:33:51 +00:00
Alasdair Kergon
d5f5a46b33 post-release 2010-10-25 16:38:20 +00:00
Dave Wysochanski
ac146fc759 Add interactive tests for lvm_{pv|vg|lv}_get_property(). 2010-10-25 14:09:19 +00:00
Dave Wysochanski
5b5c981c13 Add lvm_lv_get_property() generic function to obtain value of any lv propert
Add a generic LV property function to lvm2app, similar to VG function.
Return lvm_property_value and require caller to check 'is_valid' flag
and lvm_errno() for API error.
2010-10-25 14:09:08 +00:00
Dave Wysochanski
fbe2b85f22 Add lvm_pv_get_property() generic function to obtain value of any pv property.
Add a generic PV property function to lvm2app, similar to VG function.
Return lvm_property_value and require caller to check 'is_valid' flag
before using the value.  If 'is_valid' is not set, then lvm_errno()
should be used to obtain the specific error.
2010-10-25 14:08:55 +00:00
Dave Wysochanski
377a39fcaf Add lvm_vg_get_property() generic vg property function.
Add a generic VG property function to lvm2app.  Call the internal library
vg_get_property() function.  Strings are dup'd internally.
Rework lvm_vg_get_property to return lvm_property_value and require caller
to check 'is_valid' flag.  If !is_valid, the caller can check lvm_errno()
for the specific error.

Create a 'get_property' function, local to lvm2app, that factors out
most of the common code that copies the components of lvm_property_type
into lvm_property_value.  This allows for a 1-line function for each
of the generic property functions exported by lvm2app.
2010-10-25 14:08:43 +00:00
Dave Wysochanski
6674b9a66f Add 'is_integer' flag into internal lvm_property_type.
Add 'is_integer' flag similar to 'is_string'.
Suggested in review by Petr Rockai.
2010-10-25 14:08:32 +00:00
Alasdair Kergon
73ac1ba150 pre-release 2010-10-25 13:54:29 +00:00
Zdenek Kabelac
fa3bfb017b Use const config node 2010-10-25 13:38:11 +00:00
Zdenek Kabelac
c755772a23 Fix constness warning
Fix usage of const 'data' pointer and also assign void* directly without
uneeded cast for C.
2010-10-25 13:36:57 +00:00
Zdenek Kabelac
441a1d7ee2 Fix constness warning
Keep using const pointers.
2010-10-25 13:36:09 +00:00
Zdenek Kabelac
f6318121f4 Fix constness warning for _vg_read_by_vgid() uuid usage 2010-10-25 13:35:13 +00:00
Zdenek Kabelac
88943533dd Fix constness warnings
After update of device_from_pvid() API fix constness warnings
Also fix info_from_pvid() constness warning for char* usage.
2010-10-25 13:33:42 +00:00
Zdenek Kabelac
fba05d7ba1 Use const pointer for return value of dm_basename
Fix return pointer to const as it is created from passed input const pointer.
2010-10-25 13:13:53 +00:00
Zdenek Kabelac
d302389b9f ok this seems to be never ending story... 2010-10-25 13:10:13 +00:00
Zdenek Kabelac
14ccfd53b5 rom -> from 2010-10-25 13:04:23 +00:00
Zdenek Kabelac
e0256ba49a Use 'const' struct id *pvid for device_from_pvid()
Update interface for device_from_pvid and use const pointer.
2010-10-25 13:02:26 +00:00
Zdenek Kabelac
22f922671e Switch to char* arithmetic from void* 2010-10-25 13:00:35 +00:00
Zdenek Kabelac
589f03e91a Fix missing initilisation to 0
Add missing init value for variable 'found' which is later tested and may
have contained some garbage value.
2010-10-25 12:59:24 +00:00
Zdenek Kabelac
759241e905 Fix potential NULL pointer dereference
Makes clang happier as it covers all code paths and avoids NULL pointer
dereference through the 'com' pointer (which is NULL by default static
initialisation).
2010-10-25 12:57:00 +00:00
Zdenek Kabelac
b68cb6026e Reuse result of previous strchr
Reported by clang as: Argument with 'nonnull' attribute passed null

Reuse the result of the last strchr() call - make sure, 'st' point is not
null for the next strchr() call.
2010-10-25 12:08:15 +00:00
Zdenek Kabelac
b1b18e183c Ensure we always have origin defined
Reported by clang as: Logic error Dereference of null pointer
Make sure the code path could not use NULL origin because of some internal
code error.
2010-10-25 12:05:46 +00:00
Alasdair Kergon
64cdefc540 fix header #defines 2010-10-25 12:01:59 +00:00
Zdenek Kabelac
52aa8cabe3 Print vg_name and do not to access vg->name
Reported by clang as: Logic error Dereference of null pointer
Replace pointer dereference with vg_name.
2010-10-25 12:01:38 +00:00
Zdenek Kabelac
e420410f9a Fix clang warning for ntohl(*((uint32_t *)buf))
We cast (char*) to (uint32_t*) that changes alignment requierements.
For our case the code has been correct as alloca() returns properly
aligned buffer, however this patch make it cleaner and more readable
and avoids warning generation.
2010-10-25 11:57:06 +00:00
Alasdair Kergon
45f69c7d5f Use a more-generic name for the new kernel flag so list_devices can share it. 2010-10-25 11:44:20 +00:00
Alasdair Kergon
612b90ba52 Add global/metadata_read_only to use unrepaired metadata in read-only cmds. 2010-10-25 11:20:54 +00:00
Alasdair Kergon
ae0ac24fdd Don't take write lock in vgchange --refresh, --poll or --monitor. 2010-10-25 10:40:13 +00:00
Alasdair Kergon
4e51f802a0 restrict last checkin to devs consisting entirely of error target 2010-10-25 10:37:34 +00:00
Mike Snitzer
e4c3b136f5 Never scan a device which is using the error target
A merged snapshot's DM device is made to use the "error" target as part
of lvm's transaction to merge a snapshot.  This snapshot merge use-case
aside, any device using the error target shouldn't be scanned.
2010-10-24 17:36:58 +00:00
Dave Wysochanski
f1329d4511 Add lv_get_property() internal lvm function. 2010-10-21 18:51:16 +00:00
Dave Wysochanski
ee5f8f12bf Rename fields in lvm_property_type.
Based on review comments, rename a few fields in lvm_property_type.
In particular, change 'is_writeable' to 'is_settable', which is
more intuitive to the intent of the bitfield (a 'set' function
exists for this field/property).  Also, remove the char array
for 'id' - unnecessary as we can just use the string passed in
to do the strcmp.  Finally rename the union members from n_val
to 'integer' and 's_val' to 'string'.
2010-10-21 14:49:43 +00:00
Dave Wysochanski
cb41445647 Add lv_read_ahead and lv_kernel_read_ahead 'get' functions. 2010-10-21 14:49:31 +00:00
Dave Wysochanski
6d518948a9 Refactor and add code for (lv) 'lv_origin' get function. 2010-10-21 14:49:20 +00:00
Dave Wysochanski
45628f9813 Refactor and add code for (lv) 'lv_name' get function. 2010-10-21 14:49:10 +00:00
Petr Rockai
affe488e93 Reduce a (huge) timeout in t-mirror-names. 2010-10-20 15:35:00 +00:00
Petr Rockai
c294dbb4c8 Add a test for BZ 509182 (vgchange --monitor y breaks with max_lv). 2010-10-20 15:32:15 +00:00
Zdenek Kabelac
a60e991ee7 Add coverage test for partition table scanning 2010-10-20 15:14:13 +00:00
Petr Rockai
3cfab15e8a Implement dmeventd -R, allowing dmeventd to be restarted without losing
monitoring state.
2010-10-20 15:12:12 +00:00
Zdenek Kabelac
eb8af4be25 Fix strict-aliasing compile warning in partition table scanning 2010-10-20 15:07:30 +00:00
Petr Rockai
014546ad62 Revert some debug statements that slipped in with last checkin. 2010-10-20 14:51:18 +00:00
Petr Rockai
e2240edafc Fix a deadlock in clvmd.
The signalling code (pthread_cond_signal/pthread_cond_wait) in the
pre_and_post_thread was using the wait mutex (see man pthread_cond_wait)
incorrectly, and this could cause clvmd to deadlock when timing was
right. Detailed explanation of the problem follows.

There is a single mutex around (L for Lock, U for Unlock), a signal (S) and a
wait (W). C for pthread_create. Time flows from left to right, each arrow is a
thread.

So first the "naive" scenario, with no mutex (PPT = pre_and_post_thread, MCT =
main clvmd thread; well actually the thread that does read_from_local_sock). I
will also use X, for a moment when MCT actually waits for something to happen
that PPT was supposed to do.


MCT -----C ------S--X-----S----X----------------------S------XXXXXXXXX
         |                everything OK up to this --> <-- point...
PPT       -----WWW-----WWWW------------------------------WWWWWWWWWWWWW

Ok, so pthread API actually does not let you use W/S like that. It goes out of
its way to tell you that you need a mutex to protect the W so that the above
cannot happen. *But* if you are creative and just lock around the W's and S's,
this happens:

MCT ----C-----LSU----X-----LSU----X------------LSU------XXXXXXX
        |
PPT      ---LWWWU-------LWWWWU-----------------------LWWWWWWWWW

Ooops. Nothing changed (the above is what actually was done by clvmd before
this satch). So let's do it differently, holding L locked *all* the time in
PPT, unless we are actually in W (this is something that the pthread API does
itself, see the man page).

MCT ----C-----LSU------X---LSU---X-----LLLLLLLSU----X----
        |                             (and they live happily ever after)
PPT     L---WWWWW---------WWWW----------------W----------

So W actually ensures that L is unlocked *atomically* together with entering
the wait. That means that unless PPT is actually waiting, it cannot be
signalled by MCT. So if MCT happens to signal it too soon (it wasn't waiting
yet), it (MCT) will be blocked on the mutex (L), until PPT is actually ready to
do something.
2010-10-20 14:46:45 +00:00
Petr Rockai
e4d6cfc013 Avoid an extraneous orphans unlock in vgextend --restoremissing. 2010-10-18 17:27:10 +00:00
Petr Rockai
79e3827207 Update WHATS_NEW. 2010-10-15 17:04:02 +00:00
Petr Rockai
ba29d593b9 Implement automatic snapshot extension with dmeventd, and add two new options
to lvm.conf in the activation section: 'snapshot_autoextend_threshold' and
'snapshot_autoextend_percent', that define how to handle automatic snapshot
extension. The former defines when the snapshot should be extended: when its
space usage exceeds this many percent. The latter defines how much extra space
should be allocated for the snapshot, in percent of its current size.
2010-10-15 16:28:14 +00:00
Petr Rockai
b53f7f9993 Fixes for the devices/disable_after_error_count documentation. 2010-10-15 16:24:00 +00:00
Zdenek Kabelac
e6e871afbe Update 2010-10-15 09:55:37 +00:00
Zdenek Kabelac
45edc2ae3f Speedup memory un/locking
Move the call of find_config_tree_node() from inner loop to outer
section of maps scanning.
2010-10-15 09:48:23 +00:00
Zdenek Kabelac
d54db5013b Fix linking order for liblvm2cmd
Reorder linked libraries so we better support --as-needed linker flag used
by some distributions (i.e. Gentoo).

Patch suggested by Diego Elio Pettenò <flameeyes <at> gmail.com>
2010-10-15 09:41:21 +00:00
Alasdair Kergon
5426e85284 Add --setuuid to dmsetup rename.
Add dm_task_set_newuuid to set uuid of mapped device post-creation. (pjones)
2010-10-15 01:10:27 +00:00
Jonathan Earl Brassow
81631b1986 Fix for bug 637936: killing both redundant logs causes deadlock
Problem:
When both legs of a mirrored log fail, neither the log nor the parent
mirror can proceed.  The repair code must be careful to replace the
log with an error target before operating on the parent - otherwise,
the parent can get stuck trying to suspend because it can't push through
any writes.  The steps to replace the log device with an error target
were incomplete and resulted in the replacement not happening at all!

The code originally had all the necessary logic to complete the
replacement task, but was pulled out in a effort to clean-up that
section of code, while fixing another bug:
<offending commit msg>
In addition, I added following three changes.

- Removed tmp_orphan_lvs handling procedure
  It seems that _delete_lv() can handle detached_log_lv properly
  without adding mirror legs in mirrored log to tmp_orphan_lvs.
  Therefore, I removed the procedure.

- Removed vg_write()/vg_commit()
  Metadata is saved by vg_write()/vg_commit() just after detached_log_lv
  is handled. Therefore, I removed vg_write()/vg_commit().
</offending commit msg>

http://sources.redhat.com/cgi-bin/cvsweb.cgi/LVM2/lib/metadata/mirror.c?cvsroot=lvm2&f=h#rev1.130

I've reverted the "clean-up" changes associated with that fix, but not what
that commit was actually fixing.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-10-14 20:03:12 +00:00
Petr Rockai
df1b244fda Add a VERBOSE option to make check (make check VERBOSE=1). 2010-10-14 14:36:26 +00:00
Mike Snitzer
004cda1b64 Convey need for snapshot-merge target in lvconvert error message and man
page.

Add ->target_name to segtype_handler to allow a more specific target
name to be returned based on the state of the segment.

Result of trying to merge a snapshot using a kernel that doesn't have
the snapshot-merge target:

Before:
# lvconvert --merge vg/snap
  Can't expand LV lv: snapshot target support missing from kernel?
  Failed to suspend origin lv

After:
# lvconvert --merge vg/snap
  Can't process LV lv: snapshot-merge target support missing from kernel?
  Failed to suspend origin lv
  Unable to merge LV "snap" into it's origin.
2010-10-13 21:26:37 +00:00
Petr Rockai
97c6592073 Tweak WHATS_NEW for devices/disable_after_error_count. 2010-10-13 15:50:34 +00:00
Petr Rockai
949b8c519b Tweak the lvm.conf comment about devices/disable_after_error_count. 2010-10-13 15:45:18 +00:00
Petr Rockai
e59624d6e9 Update WHATS_NEW. 2010-10-13 15:41:52 +00:00
Petr Rockai
74b228ee94 Limit repeated accesses to broken devices.
Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-10-13 15:40:38 +00:00
Petr Rockai
1565532bc7 Update WHATS_NEW. 2010-10-13 13:53:25 +00:00
Petr Rockai
21a17f291c Give correct error message when creating a too-small snapshot (BZ 587063) 2010-10-13 13:52:53 +00:00
Zdenek Kabelac
919568a223 Don't use floor() in _bitset_with_random_bits
Use _even_rand() function instead of floor() in _bitset_with_random_bits().
floor() function is missing in dietlibc (on architectures other than x86).
Moreover using floor() to clip rand results does not assure even result
distribution. _even_rand() uses integer arithmetic only and is designed to
return evenly distributed results.

> Looks OK to me. It took a while to decipher what is the exact meaning of
> the loop in _even_rand (to a non-pseudorandomness-expert) but I am
> fairly comfortable with it now. If I understand this correctly, it
> rejects numbers that come from an "incomplete" slice of the RAND_MAX
> space (considering the number space [0, RAND_MAX] is divided into some
> "max"-sized slices and at most a single smaller slice, between [n*max,
> RAND_MAX] for suitable n -- numbers from this last slice are discarded
> because they could distort the distribution in favour of smaller
> numbers).

Signed-off-by: Przemyslaw Iskra <sparky <at> pld-linux.org>
Reviewed-by: Petr Rockai <prockai <at> redhat.com>
2010-10-13 12:18:53 +00:00
Petr Rockai
ec81f7151d Update WHATS_NEW. 2010-10-13 10:37:27 +00:00
Petr Rockai
46eae0cdd8 Add a test for vgextend --restoremissing. 2010-10-13 10:36:26 +00:00
Petr Rockai
e5ecec8140 Implement vgextend --restoremissing (BZ 537913), which makes it possible to
re-add a physical volume that has gone missing previously, due to a transient
device failure, without re-initialising it.

Signed-off-by: Petr Rockai <prockai@redhat.com>
Reviewed-by: Alasdair Kergon <agk@redhat.com>
2010-10-13 10:34:31 +00:00
Zdenek Kabelac
401f9e0eaf Skip fsadm testing until proper installation rules are commited in.
For now testing of fsadm is broken - as it's executing fsadm from system
and not from the package thus we get wrong results.
2010-10-13 09:44:52 +00:00
Dave Wysochanski
06769c9ccd Fix lv_modules_dup segfault. 2010-10-12 17:09:23 +00:00
Petr Rockai
009e7950bf Make lvconvert respect --yes/--force in the inactive log conversion
prompt. Fixes BZs 642055, 621281. Patch by Taka.

Signed-off-by: Takahiro Yasui <tyasui@redhat.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-10-12 16:41:17 +00:00
Dave Wysochanski
66ad893ca4 update WHATS_NEW 2010-10-12 16:17:50 +00:00
Petr Rockai
15f4b08f94 Add an option to skip some tests in make check (regex based, like T=): eg.
$ sudo make check S='mirror-basic|fsadm'
2010-10-12 16:17:25 +00:00
Petr Rockai
9b4b94edc0 test-utils: Fix a slight bug in the dmeventd-already-running error message. 2010-10-12 16:13:43 +00:00
Dave Wysochanski
476099efc1 Refactor and add code for (lv) 'modules' get function. 2010-10-12 16:13:06 +00:00
Dave Wysochanski
cab4c903d0 Refactor and add code for (lv) 'mirror_log' get function.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reviewed-By: Petr Rockai <prockai@redhat.com>
2010-10-12 16:12:50 +00:00
Dave Wysochanski
d9da96c8ad Refactor and add code for (lv) 'lv_kernel_{major|minor}' get functions.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reviewed-By: Petr Rockai <prockai@redhat.com>
2010-10-12 16:12:33 +00:00
Dave Wysochanski
d9ea736248 Refactor and add code for (lv) 'convert_lv' get function.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reviewed-By: Petr Rockai <prockai@redhat.com>
2010-10-12 16:12:18 +00:00
Dave Wysochanski
b4dc68ba50 Refactor and add code for (lv) 'move_pv' get function.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reviewed-By: Petr Rockai <prockai@redhat.com>
2010-10-12 16:12:02 +00:00
Dave Wysochanski
786f8576e3 Refactor and add code for (lv) 'origin_size' get function.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reviewed-By: Petr Rockai <prockai@redhat.com>
2010-10-12 16:11:48 +00:00
Dave Wysochanski
ec739277a1 Refactor and add code for (lv) 'lv_path' get function. 2010-10-12 16:11:34 +00:00
Dave Wysochanski
8376fd198f Add some lv 'get' functions that require no refactoring.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reviewed-By: Petr Rockai <prockai@redhat.com>
2010-10-12 16:11:20 +00:00
Zdenek Kabelac
39429d3905 Update script for fsadm testing 2010-10-08 15:03:21 +00:00
Zdenek Kabelac
4fb84eedfe Add support for noninterctive shell execution
Try to distinguish between the case of  using interactive shell and non
interactive running - different combinations of  '-y' and '-p' option
needs to be used for fsck.
2010-10-08 15:02:05 +00:00
Zdenek Kabelac
1a64b3c812 Fix usage of --yes flag for ReiserFS resize
Put 'dry' before resize command - using dry for echo had no use.
2010-10-08 15:00:06 +00:00
Zdenek Kabelac
66c590c89c Fix detection of mounted filesystem.
Update the way how fsadm detects mounted filesystem.
With udev /dev/dm-XXX paths are now returned - but mount or /proc/mounts
prints names in form of /dev/mapper/vg-lv - so the match was not found.
Fixex RHBZ #638050.

Current solution uses same trick as mount and detects vg-lv name through
/sys where available - this should be reasonable safe.

Instead of calling mount without parameter to get actual mount table,
switch to use /proc/mounts directly.
2010-10-08 14:55:19 +00:00
Zdenek Kabelac
821c266f1d Fix assignment of default LVM variable
Fix a typo which does not work in case LVM_BINARY is empty string.
Using proper :-  syntax.
2010-10-08 14:49:25 +00:00
Zdenek Kabelac
4e63287d75 Support for passing --yes flag recursively. 2010-10-08 13:49:20 +00:00
Zdenek Kabelac
7f5ac28c78 Correctly respect --dry-run option for fsadm
Fix missing 'dry' execution of lvresize - fixing problem where resize
command were 'dry-run' executed - but lvresize has been executed for real.

Also adapt code slightly to support better recursive execution of fsadm
through lvresize call.
2010-10-08 13:47:10 +00:00
Zdenek Kabelac
52cd4a5893 Fix a serious bug in the behavior of fasdm tool when breaked.
Under certain conditions it was possible to break (^C) fsadm before actually
resizing filesystem, but lvresize which executed fsadm will think resize
was succesful and shrinks partitions with unresized filesystem on it.

Fix by returning error (1) for this case - this stops lvresize from futher
proceding in resize operation.
2010-10-08 12:35:56 +00:00
Zdenek Kabelac
04c5af2130 Replace "can not" with "cannot" 2010-10-08 11:18:29 +00:00
Alasdair Kergon
52893778f8 Allow CC to be overridden at build time (for 'scan-build make'). 2010-10-07 16:33:34 +00:00
Dave Wysochanski
600e67b17a Rename 'flags' to 'status' for struct metadata_area.
In other LVM memory structures such as volume_group, the field
used to store flags is called "status", and on-disk fields are called
'flags', so rename the one inside metadata_area to be consistent.
Not only is it more consistent with existing code but is cleaner
to say "the status of this mda is ignored".

Background for this patch - prajnoha pinged me on IRC this morning
about a fix he was working on related to metadataignore when
metadata/dirs was set.  I was reviewing my patches from this year
and realized the 'flags' field was probably not the best choice
when I originally did the metadataignore patches.
2010-10-05 17:34:05 +00:00
Milan Broz
2da49b93bd Restrict lvm1 partial mode.
Current lvm1 allocation code seems to not properly
map segments on missing PVs.

For now disable this functionality.
(It never worked and previous commit just introduced segfault here.)

So the partial mode in lvm1 can only process missing PVs
with no LV segments only.

Also do not use random PV UUID for missing part but use fixed
string derived from VG UUID (to not confuse clvmd tests).
2010-10-04 18:59:01 +00:00
Alasdair Kergon
a65df0e598 Add dm_zalloc and use it and dm_pool_zalloc throughout. 2010-09-30 21:06:50 +00:00
Dave Wysochanski
54d4416ef8 Fix copyright dates on new files lib/metadata/{lv|vg|pv}.[ch]. 2010-09-30 20:47:18 +00:00
Dave Wysochanski
42a7af1632 Update WHATS_NEW for commits related to generic property functions 2010-09-30 14:26:48 +00:00
Peter Rajnoha
cfbbf34d6a Fix memory leak of vg_read while using live copies of metadata in directories. 2010-09-30 14:12:14 +00:00
Dave Wysochanski
5b35bbdf1b Add pv_get_property and create generic internal _get_property function.
We need to use a similar function for pv and lv properties, so just make
a generic _get_property() function that contains most of the required
functionality.  Also, add a check to ensure the field name matches the
object passed in by re-using report_type_t enum.  For pv properties,
the report_type might be either PVS or LABEL.

In addition, add 'const' to 'get' functions object parameter, but not
'set' functions.  Add _not_implemented_set() and _not_implemented_get()
functions.
2010-09-30 14:09:45 +00:00
Dave Wysochanski
0c46d48e40 Add pv 'get' functions for all pv properties.
Add 'get' functions for all pv properties.
Multiply by SECTOR_SIZE for pv properties pv_mda_free, pv_mda_size,
pe_start, pv_size, pv_free, pv_used.
2010-09-30 14:09:33 +00:00
Dave Wysochanski
e78ad5f175 Add pv_name_dup() and pv_fmt_dup() helper functions. 2010-09-30 14:09:22 +00:00
Dave Wysochanski
09c2296a9a Add pv_mda_size, pv_mda_free, and pv_used functions, call from 'disp' functions. 2010-09-30 14:09:10 +00:00
Dave Wysochanski
55a3cf7ecd Add 'get' functions for vg fields.
Add 'get' functions based on generic macros for VG, PV, and LV.
Add 'get' functions for vg string fields, vg_name, vg_fmt, vg_sysid,
vg_uuid, vg_attr, and vg_tags, and all numeric fields.
2010-09-30 14:08:58 +00:00
Dave Wysochanski
180705e9eb Make generic GET_*_PROPERTY_FN macros and define secondary macro for vg, pv, lv.
Will need similar macros for VG, PV and LV, so define a generic one, and just
pass in the struct name and variable name for the specific macro.
2010-09-30 14:08:46 +00:00
Dave Wysochanski
f5cf560a3c Add supporting functions vg_name_dup, vg_fmt_dup, vg_system_id_dup.
Add supporting functions for vg_name, vg_fmt, vg_system_id.
Append "_dup" to end of supporting functions to make clear the strings
are dup'd and to avoid namespace conflict with vg_name.
2010-09-30 14:08:33 +00:00
Dave Wysochanski
1e11b5dd7c Add pv_tags_dup, vg_tags_dup, lv_tags_dup functions that call tags_format_and_copy. 2010-09-30 14:08:19 +00:00
Dave Wysochanski
f201c75a24 Add tags_format_and_copy() common function and call from _tags_disp.
Add a common function to allocate memory and format a string of
tags.
Call tags_format_and_copy() from _tags_disp().
2010-09-30 14:08:07 +00:00
Dave Wysochanski
41219737ad Add pv_uuid_dup, vg_uuid_dup, and lv_uuid_dup, and call id_format_and_copy.
Add supporting functions for pv_uuid, vg_uuid, and lv_uuid.
Call new function id_format_and_copy.  Use 'const' where appropriate.
Add "_dup" suffix to indicate memory is being allocated.
Call {pv|vg|lv}_uuid_dup from lvm2app uuid functions.
2010-09-30 14:07:47 +00:00
Dave Wysochanski
35d1b6665c Add id_format_and_copy() common function and call from _uuid_disp.
Add supporting uuid function to allocate memory and call id_write_format.
Call id_format_and_copy from _uuid_disp.
2010-09-30 14:07:33 +00:00
Dave Wysochanski
0543201db1 Simplify logic to create 'attr' strings.
This patch addresses code review request to simplify creation of 'attr'
strings.  The simplification is done in this separate patch to more
easily review and ensure the simplification is done without error.
2010-09-30 14:07:19 +00:00
Dave Wysochanski
8e4da1d397 Add {pv|vg|lv}_attr_dup() functions and refactor 'disp' functions.
Move the creating of the 'attr' strings into a common function so
they can be called from the 'disp' functions as well as the new
'get' property functions.
Add "_dup" suffix to indicate memory is allocated.
Refactor pvstatus_disp to take pv argument and call pv_attr_dup().
2010-09-30 13:52:55 +00:00
Dave Wysochanski
2977bfa721 Add lib/metadata/vg.[ch] and lib/metadata/lv.[ch].
These got missed when git cvsexportcommit was used.
2010-09-30 13:16:55 +00:00
Dave Wysochanski
5fa92bf619 Add lib/metadata/pv.[ch] new files.
Apparently git cvsexportcommit does not properly add new files
from a git commit.
2010-09-30 13:15:42 +00:00
Dave Wysochanski
3e37822415 Refactor metadata.[ch] into lv.[ch] for lv functions.
This patch is similar to the other patches for pv and vg
functionality, and separates lv functionality into separate
files, concentrating on reporting fields and simple functions.
2010-09-30 13:05:45 +00:00
Dave Wysochanski
4d65f59824 Refactor metadata.[ch] into pv.[ch] for pv functions.
The metadata.[ch] files are very large.  This patch makes a first
attempt at separating out pv functions and data, particularly
related to the reporting fields calculations.

More code could be moved here but for now I'm stopping at reporting
functions 'get' / 'set' functions.
2010-09-30 13:05:20 +00:00
Dave Wysochanski
cd5829fc42 Refactor metadata.[ch] into vg.[ch] for vg functions.
The metadata.[ch] files are very large.  This patch makes a first
attempt at separating out vg functions and data, particularly
related to the reporting fields calculations.
2010-09-30 13:04:55 +00:00
Zdenek Kabelac
86b056872d Fix memory leak of config_tree
Adding missing destroy_config_tree() for cft_override if it has been allocated.
2010-09-30 11:44:54 +00:00
Zdenek Kabelac
b1007b02c0 Fix leaked pool report
Swap pool destruction order in dmeventd_lvm2_exit() to fix leak report.
2010-09-30 11:40:14 +00:00
Zdenek Kabelac
d3126d15cf Maps fix
Read complete content of /proc/self/maps into one buffer without
realocation in the middle of reading and before doing any m/unlock
operation with these lines - as some of them gets change.
With previous implementation we've read some mappings twice ([stack])
2010-09-30 11:32:40 +00:00
Zdenek Kabelac
d61ed147ac Add missing cleanup rule for generated .exported_symbols_generated 2010-09-30 10:08:58 +00:00
Alasdair Kergon
b1f3aa2920 Speed up unquoting of quoted double quotes and backslashes. 2010-09-28 01:29:06 +00:00
Alasdair Kergon
8ad3bb1462 drop an unnecessary 'stack' 2010-09-27 19:15:13 +00:00
Alasdair Kergon
b91a077d23 was renamed 2010-09-27 19:10:46 +00:00
Alasdair Kergon
3df29c1667 Speed up CRC32 calculations by using a larger lookup table.
Use -DDEBUG_CRC32 to revert to old function and check new one gives same result.
2010-09-27 19:09:34 +00:00
Alasdair Kergon
abf6c8eb4d pre-release 2010-09-24 16:24:57 +00:00
Peter Rajnoha
cffd634681 Add escape sequence for ':' and '@' found in device names used as PVs. 2010-09-23 12:02:33 +00:00
Alasdair Kergon
e72e8cc71f Replace alloca with dm_malloc in _aligned_io.
(This section of code dates from 2.4 and could be written more efficiently nowadays.)
2010-09-22 22:31:45 +00:00
Milan Broz
8eaef39b33 Fix handling of partial VG for lvm1 format metadata
If some lvm1 device is missing, lvm fails on all operations
# vgcfgbackup -f bck -P vg_test
  Partial mode. Incomplete volume groups will be activated read-only.
  3 PV(s) found for VG vg_test: expected 4
  PV segment VG free_count mismatch: 152599 != 228909
  PV segment VG extent_count mismatch: 152600 != 228910
  Internal error: PV segments corrupted in vg_test.
  Volume group "vg_test" not found

Allow loading of lvm1 partial VG by allocating "new" missing PV,
which covers lost space. Also this fake mising PV inform code
that it is partial VG.

https://bugzilla.redhat.com/show_bug.cgi?id=501390
2010-09-22 13:45:21 +00:00
Alasdair Kergon
1588334093 Fix name in msg in last checkin.
(The problem the last checkin addressed was a segfault in 'pvs -a' if .cache
didn't contain every PV in a VG.)
2010-09-22 01:50:38 +00:00
Alasdair Kergon
6f4fb574ec Track recursive filter iteration to avoid refreshing while in use. (2.02.56) 2010-09-22 01:36:13 +00:00
Peter Rajnoha
64cc7834a8 "goto_bad" should be used in alloc_printed_tags function, not "goto bad". 2010-09-21 10:42:02 +00:00
Peter Rajnoha
11e2edcf5f Revert to old glibc behaviour for vsnprintf used in emit_to_buffer function.
Revert to old glibc behaviour for vsnprintf used in emit_to_buffer fn.
Otherwise, the check that follows would be wrong for new glibc versions.
This caused the rh bug #633033 to be undetected and pass throught the check,
corrupting the metadata!
2010-09-20 14:25:27 +00:00
Peter Rajnoha
f900ddb544 Use dynamic allocation for metadata's tag buffer (removes 4096 char. limit). 2010-09-20 14:23:20 +00:00
Dave Wysochanski
4f60a6ea02 Update vg_mda_free 'get' function to multiply by SECTOR_SIZE. 2010-09-09 19:38:03 +00:00
Peter Rajnoha
dc1bc980d0 Add random suffix to archive file names to prevent races when being created.
In certain configurations, we're not under a VG rw lock while trying to write
a new archive file with VG metadata. A common example is using "vgs" while
having the content of backup and archive directories empty. The code scans the
content of these directories and tries to determine the final index that should
be used in archive name. Since we're not under a lock, we can get into a race
while choosing the index which could end up showing errors about not being able
to rename to final archive name. Let's add random number suffix to these archive
file names so we can avoid the race.
2010-09-09 13:13:12 +00:00
Peter Rajnoha
3d3ae02e62 Reinitialize archive and backup handling on toolcontext refresh.
For example, when using '--config "backup { ... }"' line, the values from
lvm.conf (or default values) should be overridden. This patch adds
reinitialisation of archive and backup handling on toolcontext refresh
which makes these settings to be applied.
2010-09-09 13:07:13 +00:00
Jonathan Earl Brassow
a70a110f31 This patch fixes an issue where cluster mirror write I/O
can be opprobriously slow if created with '--nosync'.

One of the ways cluster mirrors coordinate I/O and recovery
amoung the different machines is by the use of the log
function 'is_remote_recovering()' which lets nodes know if
a region they wish to perform a write on is currently being
recovered on another node.  If the region is being recovered,
the I/O is delayed.

The 'is_remote_recovering' routine has been optimized to
avoid the deluge of requests that would be issued to the
userspace log server by maintaining a marker of how far
the recovery has gotten.  It can then immediately return
'not recovering' if the region being inquired about is
less than this mark.  Additionally, if the region of
concern is greater than the mark, the function will
limit the number of transmissions to userspace by assuming
the region /is/ being recovered when skipping the
transmission.  This limits the amount of processing
and updates the mark in 1/4 sec time steps.

This patch fixes a problem where 'the mark' is not being
updated because of faulty logic in the userspace log
daemon.  When '--nosync' is used to create a cluster
mirror, the userspace log daemon never has a chance
to update the mark in the normal way.  The fix is to set
the mark to "complete" if the mirror was created with
the --nosync flag.
2010-08-30 18:37:42 +00:00
Milan Broz
05d3869acf Do not run singlenode clvmd tests if support for singlenode not compiled in. 2010-08-27 08:52:54 +00:00
Milan Broz
72489fb458 Currently tests require SI units enabled and data alignment to 1MiB.
For now force it in lvm.conf (otherwise it fails on older systems like RHEL5
where are these options disabled by default).

FIXME: it should test and detect both versions.
2010-08-27 07:59:40 +00:00
Jonathan Earl Brassow
fe8b1db933 This patch fixes a problem where the mirror polling process
may never complete.

If you convert from a linear to a mirror and then convert that
mirror back to linear /while/ the previous (up)convert is
taking place, the mirror polling process will never complete.
This is because the function that polls the mirror for
completion doesn't check if it is still polling a mirror and
the copy_percent that it gets back from the linear device is
certainly never 100%.

The fix is simply to check if the daemon is still looking at
a mirror device - if not, return PROGRESS_CHECK_FAILED.

The user sees the following output from the first (up)convert
if someone else sneaks in and does a down-convert shortly
after their convert:
[root@bp-01 ~]# lvconvert -m1 vg/lv
  vg/lv: Converted: 43.4%
  ABORTING: Mirror percentage check failed.
2010-08-26 16:29:12 +00:00
Jonathan Earl Brassow
8fdd90eb40 This patch fixes a potential for I/O to hang and LVM commands
to block when a mirror under a snapshot suffers a failure.

The problem has to do with label scanning.  When a mirror suffers
a failure, the kernel blocks I/O to prevent corruption.  When
LVM attempts to repair the mirror, it scans the devices on the
system for LVM labels.  While mirrors are skipped during this
scanning process, snapshot-origins are not.  When the origin is
scanned, it kicks up I/O to the mirror (which is blocked)
underneath - causing the label scan (an thus the repair operation)
to hang.

This patch simply bypasses snapshot-origin devices when doing
labels scans (while ignore_suspended_devices() is set).  This
fixes the issue.
2010-08-26 14:21:50 +00:00
Milan Broz
564a356f09 Fix previous const removal. 2010-08-26 12:22:05 +00:00
Milan Broz
3927137203 Fix return type qualifier to avoid compiler warning.
introduced in commit b16b4d92a7
"Improve various log messages."

fixes a lot of
../include/metadata.h:148: warning: type qualifiers ignored on function return type
2010-08-26 12:08:19 +00:00
Mike Snitzer
f826ad5c89 Make example.conf description for 'default_data_alignment' more generic. 2010-08-25 13:06:03 +00:00
Milan Broz
517ee7f6c3 Fix timestamp.
Not only in Wonderland time can run backwards.
2010-08-25 11:25:02 +00:00
Milan Broz
99cc41356c Update configure after last change. 2010-08-23 13:44:31 +00:00
Fabio M. Di Nitto
e76736cf55 Based on auto-detection or user requested cluster managers for clvmd,
set appropriate Required-Start and Required-Stop at configure time.

Reorder the checks for user selected cluster managers to match auto
detected ones, to be consistent in the output.

Add special case for qdiskd that´s started after cman/lock_gulmd for
RHEL-4/RHEL-5.
2010-08-23 11:37:02 +00:00
Milan Broz
2a7bbc7e46 Fix pvmove --abort <dev> return code
It prints error code even if abort operation succeeds:

pvmove --abort /dev/sdb
  Command failed with status code 5.
2010-08-23 11:34:40 +00:00
Milan Broz
bda6449118 Fix pvmove --abort to work even for empty pvmove LV
If pvmove crashed and metadata contains pvmove LV
but without miorrored segments, pvmove --abort
will not repair the situation (and finish wth success!).

Fix it by allowing metadata update if aborting
(thus removing pvmove LV) even if no moved LVs detected.

(Tested on real metadata provided by an lvm user:-)
2010-08-23 11:34:10 +00:00
Mike Snitzer
e6166e4498 Verify that pvcreate --dataalignment really does override the topology
detected alignment.

NOTE: lvm2 doesn't detect MD 1.2 metadata (now the default on RHEL6) so
for now I'm forcing 1.0 metadata.  This was needed to be able to reuse
the existing loop devices but recreate the md device with different
raid0 striping.
2010-08-21 15:43:45 +00:00
Alasdair Kergon
80a878ade5 . 2010-08-21 00:18:05 +00:00
Alasdair Kergon
bbf2fcf17a autoreconf also updates configure.h.in 2010-08-21 00:16:37 +00:00
Dave Wysochanski
c1d266bd9a Update configure for snitm changes 2010-08-20 22:32:18 +00:00
Mike Snitzer
7a7f3f2f5b Switch to using configure --with-default-data-alignment=<NUM> to
establish DEFAULT_DATA_ALIGNMENT.  Again, 0=64KiB, 1=1MiB, 2=2MiB

Default is 1.
2010-08-20 22:24:58 +00:00
Mike Snitzer
0371941707 Update heuristic used for default and detected data alignment.
Add "devices/default_data_alignment" to lvm.conf to control the internal
default that LVM2 uses: 0==64k, 1==1MB, 2==2MB, etc.

If --dataalignment (or lvm.conf's "devices/data_alignment") is specified
then it is always used to align the start of the data area.  This means
the md_chunk_alignment and data_alignment_detection are disabled if set.

(Same now applies to pvcreate --dataalignmentoffset, the specified value
will be used instead of the result from data_alignment_offset_detection)

set_pe_align() still looks to use the determined default alignment
(based on lvm.conf's default_data_alignment) if the default is a
multiple of the MD or topology detected values.
2010-08-20 20:59:05 +00:00
Dave Wysochanski
6f449e65a4 Update WHATS_NEW 2010-08-20 20:35:55 +00:00
Dave Wysochanski
5491f09bab Define GET_NUM_PROPERTY_FN macro to simplify numeric property 'get' functions. 2010-08-20 13:02:39 +00:00
Dave Wysochanski
6949ec443e Add implmentation for simple numeric 'get' property functions.
Add 'get' functions based on the simple macro function definition for a
numeric property.

Add 'get' functions for the following: _vg_extent_count_get,
_vg_free_count_get, _max_lv_get, _max_pv_get, _pv_count_get,
_lv_count_get, _snap_count_get, _vg_seqno_get, _vg_size_get,
_vg_free_get, vg_mda_*.

For size functions, multiply by SECTOR_SIZE to return the value in bytes.
2010-08-20 12:45:09 +00:00
Dave Wysochanski
47ca8f9c12 Define GET_NUM_PROPERTY_FN macro to simplify numeric property 'get' functions. 2010-08-20 12:44:58 +00:00
Dave Wysochanski
ba303a615b Add properties.[ch] to lib/report, defined based on columns.h.
Extend the existing reporting infrastructure definitions and structures
to include a 'get' and 'set' function for each field.  We will provide
a 'get' and 'set' function for each of these fields, which will be utilized
by exported lvm2app functions.

Define a default _not_implemented 'get' and 'set' function that just sets
an errno and returns 0.  Future patches will actually implement the
specific 'get' and 'set' functions for each property.  For read-only
properties, only the 'get' function will be implemented.

Define vg_get_property() function to query a property.  We will call
this from a lvm2app function.
2010-08-20 12:44:47 +00:00
Dave Wysochanski
350582bc7b Add macro definitions to report infrastructure for character array length.
Rather than hard code the size of the field, use a #define, so we can re-use.
The #define will be needed in a future patch when we extend the reporting
infrastructure to have 'get' and 'set' functions for each field, allowing
lvm2app functions which query any report field.  In order to provide a
generic lookup based on the field id, we will define a type containing this
field id, and thus, we will need to re-use the length of this string as
it's defined inside libdevmapper.h.
2010-08-20 12:44:30 +00:00
Dave Wysochanski
b9836e714a Remove explicit double quotes from columns.h 'id' entries.
The 'id' entries in columns.h are the report field names.  Since these are
unique, we'd like to use them in generation of 'get' / 'set' functions.
As a step towards using them for this purpose, remove the explicit double
quotes and use the macro '#' character to add the double quotes back when
placing them into the '_fields' array 'id' member.
2010-08-20 12:44:17 +00:00
Dave Wysochanski
aae30e6db5 Add 'flags' field to columns.h and define FIELD_MODIFIABLE.
Add a 'flags' field to columns.h, and set it to 0 by default.
Define FIELD_MODIFIABLE flag to indicate whether a 'set' function exists
to change the field's value.
2010-08-20 12:44:03 +00:00
Dave Wysochanski
86e46f17de Add vg_mda_size and vg_mda_free functions.
Add supporting functions to get vg_mda_size and vg_mda_free fields.
Should be no functional change.
2010-08-20 12:43:49 +00:00
Milan Broz
fbc263659b Fix wrong use of LCK_WRITE
In all top vg read functions only LCK_VG_READ/WRITE can be used.
All other vg lock definitions are low-level backend machinery.

Moreover, LCK_WRITE cannot be tested through bitmask.
This patch fixes these mistakes.

For _recover_vg() we do not need lock_flags, it can be only
two of above and we always upgrading to LCK_VG_WRITE lock there.
(N.B. that code is racy)

There is no functional change in code (despite wrong masking
it produces correct bits:-)
2010-08-19 23:26:31 +00:00
Milan Broz
8c8a7cbba4 Detect LUKS signature in pvcreate
One shiny day we should use libblkid here. But now using LUKS is
very common together with LVM and pvcreate destroys LUKS completely.

So for user's convenience, try to detect LUKS signature and allow abort.
2010-08-19 23:08:18 +00:00
Milan Broz
f5e3cd0dbf Fix file descriptor leak in swap signature detection 2010-08-19 23:05:45 +00:00
Milan Broz
24c1a0954b Remove assumption that --yes must be used only in --force mode
This is not only undocumented but is is also in violation with --help
documentation.

Using --yes without --force is useful in pvcreate when it detects
old signature.
2010-08-19 23:04:37 +00:00
Milan Broz
f05cf23b89 Change the pvcreate swap/md logic
pvcreate detects MD and swap signature.

The logic hidden there is not only documented but it is also
user unfriendly. Who invented this logic should run pvcreate
on its own critical MD device to see why;-)

This patch
 - creates one function instead of duplication code
 - asks if user want to overwrite signature
 - allows aborting (!)
 (Please note that writing LVM signatute without wiping old
 is wrong, it confuses blkid, MD will not work anyway and
 swap and LUKS is broken too.)
2010-08-19 23:03:34 +00:00
Alasdair Kergon
90c883c201 post-release 2010-08-19 22:33:14 +00:00
Alasdair Kergon
2d0b95cd49 pre-release 2010-08-18 20:57:10 +00:00
Peter Rajnoha
e2bc7a277e Fix dm-mod autoloading logic to not assume control node is set correctly.
We can't rely on the fact that udev should prepare the node with right major
and minor number to trigger the module autoloading. We have to take into
account that the node could be missing or it could exist with improper
major and minor number assigned (e.g. from previous kernel versions in
an environment with static nodes and without udev). Make any corrections
if needed!
2010-08-18 13:11:56 +00:00
Jonathan Earl Brassow
adbd3e478b Fix for bug 596453: multiple mirror image failures cause lvm repair...
The lvm repair issues I believe are the superficial symptoms of this
bug - there are worse issues that are not as clearly seen.  From my
inline comments:
* If the mirror was successfully recovered, we want to always
* force every machine to write to all devices - otherwise,
* corruption will occur.  Here's how:
*    Node1 suffers a failure and marks a region out-of-sync
*    Node2 attempts a write, gets by is_remote_recovering,
*          and queries the sync status of the region - finding
*          it out-of-sync.
*    Node2 thinks the write should be a nosync write, but it
*          hasn't suffered the drive failure that Node1 has yet.
*          It then issues a generic_make_request directly to
*          the primary image only - which is exactly the device
*          that has suffered the failure.
*    Node2 suffers a lost write - which completely bypasses the
*          mirror layer because it had gone through generic_m_r.
*    The file system will likely explode at this point due to
*    I/O errors.  If it wasn't the primary that failed, it is
*    easily possible in this case to issue writes to just one
*    of the remaining images - also leaving the mirror inconsistent.
*
* We let in_sync() return 1 in a cluster regardless of what is
* in the bitmap once recovery has successfully completed on a
* mirror.  This ensures the mirroring code will continue to
* attempt to write to all mirror images.  The worst that can
* happen for reads is that additional read attempts may be
* taken.
2010-08-17 23:56:23 +00:00
Alasdair Kergon
7b0804c22f Attempt to fix buildbot failure in t-lvconvert-mirror.sh due to failing to
wait for mirror to get into sync before running subsequent command.
2010-08-17 22:01:41 +00:00
Alasdair Kergon
47dfe904ab Use 'SINGLENODE' instead of 'dead' in clvmd singlenode messages.
Ignore snapshots when performing mirror recovery beneath an origin.
Pass LCK_ORIGIN_ONLY flag around cluster.
Add suspend_lv_origin and resume_lv_origin using LCK_ORIGIN_ONLY.
2010-08-17 19:25:05 +00:00
Alasdair Kergon
7f5b44b423 Allow internal suspend and resume of origin without its snapshots. 2010-08-17 16:25:32 +00:00
Alasdair Kergon
c16ca69070 Fix dev_manager_transient to access -real device not snapshot-origin. (brassow)
Another reminder why cloning functions impedes maintenance.
2010-08-17 01:51:12 +00:00
Alasdair Kergon
394c628931 Monitor origin -real device below snapshot instead of overlay device. (brassow) 2010-08-17 01:16:41 +00:00
Alasdair Kergon
43695024af Don't really change monitoring status when in test mode. 2010-08-16 23:29:09 +00:00
Mike Snitzer
1678de7f51 Add some v1 to v2 metadata upgrade testing. 2010-08-16 23:21:20 +00:00
Alasdair Kergon
063436c82b Various small cleanups and fixes related to monitoring. 2010-08-16 22:54:35 +00:00
Alasdair Kergon
59585550a7 Remove superfluous NULL pointer tests before dm_free from dmeventd. 2010-08-16 18:19:46 +00:00
Jonathan Earl Brassow
51e294c501 Fix for bug 612291: dm devices of split off mirror images are not removed
DM devices were not handled properly on nodes in a cluster that were not
where the splitmirrors command was issued.  This was happening because
suspend_lv/resume_lv were being used in a place where activate_lv should
have been used.

When the suspend/resume are issued on (effectively) new LVs, their
'resource' (UUID) is not located in the lv_hash.  Thus, both operations
turn into no-ops.  You can see this from the output of clvmd from one
of the remote nodes:
<snip>
do_suspend_lv, lock not already held
<snip>
do_resume_lv, lock not already held

'activate_lv' enjoins the other nodes in the cluster to process the lock
and activate the new LV.  clvmd output from remote node as follows:
do_lock_lv: resource 'zMseY7CBuO3Ty09vXlplPAHzD0Y0CovjrTdv0R1VcwggMwPdYhutHErRcwm5Nd2S', cmd = 0x19 LCK_LV_ACTIVATE (READ|LV|NONBLOCK), flags = 0x84 (DMEVENTD_MONITOR ), memlock = 1
sync_lock: 'zMseY7CBuO3Ty09vXlplPAHzD0Y0CovjrTdv0R1VcwggMwPdYhutHErRcwm5Nd2S' mode:1 flags=1
sync_lock: returning lkid 27b0001

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-08-16 18:02:14 +00:00
Alasdair Kergon
22ffec640d set DEFAULT_RUN_DIR (missed from earlier checkin?) 2010-08-16 17:49:26 +00:00
Peter Rajnoha
7ab3c4c091 WHATS_NEW_DM 2010-08-16 11:22:08 +00:00
Peter Rajnoha
19934a59b2 dm-mod autoloading support is in kernel 2.6.36 actually. 2010-08-16 11:13:18 +00:00
Peter Rajnoha
f3645e3f4b Fix udev rules to support udev database content generated by older rules.
This can happen with older rules (without support for synthesized events)
that are still part of initrd while using new udev rules in the system itself.

The consequence was that new udev rules incorrectly assumed that not having
DM_UDEV_PRIMARY_SOURCE_FLAG set always means the uevent is synthesized and
inappropriate (device is still not properly activated) and so it should be
ignored. However, initrd is not updated automatically while updating the
libdevmapper/udev rules in the system and so we end up with the rules not
detecting and setting crucial parts in the initrd environment and the rules
in the system that rely on the information that should have been stored in
udev db (which is incorrect in this configuration, of course).

The overall consequence is that the update of libdevmapper/lvm2 without
regenerating the initrd could end up with a boot failure! Ignoring the event
means removing any existing symlinks in /dev!

To fix this, increase udev rules version to make a difference. So from now on,
mark rules without proper support for synthesized events as
DM_UDEV_RULES_VSN="1" and 2 (or higher) if that support is included.
2010-08-12 13:41:18 +00:00
Peter Rajnoha
d818035d3a Reinstate detection of inappropriate uevent with DISK_RO set and suppress it.
We still need to detect this one! We're not so strict with CHANGE events as
with the ADD events while applying filters in the rules so this one would
pass and it would process the rules prematurely (because it appears *before*
the actual CHANGE event used when resuming a DM device while setting read-only
state at the same time).
2010-08-12 13:07:08 +00:00
Fabio M. Di Nitto
0cce8cfbaa Fix clvmd init script return code when executed as non-root user.
clvmd daemon itself does the right thing when invoked as non-root, by
returning 4.

The patch removes the use daemon function from
/etc/rc.d/init.d/functions that´s unnecessary and has th bad habit to
mask the return codes from the real daemon.

Add a simple and generic check to see if clvmd is executed by root or not.

Our stop/reload/restart paths in the init script are complex and not all
the tools involved in the process are guaranteed to return 4 if executed
by non-root against a process that´s running as root (for example kill
-TERM will return -1 and parsing the output to catch the error is
suboptimal at best).

https://bugzilla.redhat.com/show_bug.cgi?id=553381
2010-08-12 09:14:59 +00:00
Mike Snitzer
3e132d71f5 fix t-pvcreate-operation-md.sh to require kernel.org Linux >= 2.6.33 for
the final alignment_offset check.  In the future, might look to check
for the RHEL6 kernel too.
2010-08-12 04:56:05 +00:00
Mike Snitzer
2b3a4adff8 Change default alignment of pe_start to 1MB.
The new standard in the storage industry is to default alignment of data
areas to 1MB.  fdisk, parted, and mdadm have all been updated to this
default.

Update LVM to align the PV's data area start (pe_start) to 1MB.  This
provides a more useful default than the previous default of 64K (which
generally ended up being a 192K pe_start once the first metadata area
was created).

Before this patch:
# pvs -o name,vg_mda_size,pe_start
  PV         VMdaSize  1st PE
  /dev/sdd     188.00k 192.00k

After this patch:
# pvs -o name,vg_mda_size,pe_start
  PV         VMdaSize  1st PE
  /dev/sdd    1020.00k   1.00m

The heuristic for setting the default alignment for LVM data areas is:
- If the default value (1MB) is a multiple of the detected alignment
  then just use the default.
- Otherwise, use the detected value.

In practice this means we'll almost always use 1MB -- that is unless:
- the alignment was explicitly specified with --dataalignment
- or MD's full stripe width, or the {minimum,optimal}_io_size exceeds
  1MB
- or the specified/detected value is not a power-of-2
2010-08-12 04:11:48 +00:00
Mike Snitzer
de49d45568 Require --restorefile when using pvcreate --uuid.
Introduce --norestorefile to allow user to override the new requirement.

This can also be overridden with "devices/require_restorefile_with_uuid"
in lvm.conf -- however the default is 1.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2010-08-12 04:08:59 +00:00
Peter Rajnoha
de5337b187 WHATS_NEW_DM 2010-08-11 13:12:31 +00:00
Peter Rajnoha
f0e82e4473 Recognise and give preference to md device partitions (blkext major).
We can already detect MD devices internally. But when using MD partitions,
these have "block extended major" (blkext) assigned (259). Blkext major
is also used in general, so we need to check whether the original device
is an MD device actually.
2010-08-11 12:14:23 +00:00
Petr Rockai
5a00c29f8a Update WHATS_NEW. 2010-08-09 14:06:03 +00:00
Petr Rockai
dd5d9aa6bc Never scan internal LVM devices. 2010-08-09 14:05:16 +00:00
Joe Thornber
40f03c4c2c [REGEX] fix a long standing off-by-one error (found by valgrind-pool) 2010-08-09 10:58:27 +00:00
Joe Thornber
1aaf8d589f [MM] Make valgrind aware of the pool allocators
./configure with --enable-valgrind-pool to enable this.
2010-08-09 10:56:01 +00:00
Joe Thornber
1b87c6ade7 [REGEX] fix bug in matcher that was causing segfault with chars of 0x80 and over. 2010-08-09 10:30:52 +00:00
Joe Thornber
d52e9c7704 [REGEX] Parse regexes that contain chars with value > 0x80
This is a long standing issue.  Fixed by casting a char value to
unsigned char before using it as an index into a bitset.
2010-08-09 10:29:42 +00:00
Joe Thornber
9c7425eee4 [REGEX] add a unit test for regexes containing chars with value over x80 2010-08-09 10:27:31 +00:00
Joe Thornber
7cd8b4410e [REGEX] matcher_t unit test now takes a flag to turn on fingerprinting 2010-08-09 10:23:54 +00:00
Jonathan Earl Brassow
c63e78714a Fix for bug 619221 - log device splitting regression
An incorrect fix on July 13, 2010 for an annoyance has caused a regression.
The offending check-in was part of the 2.02.71 release of LVM.  That
check-in caused any PVs specified on the command line to be ignored when
performing a mirror split.

This patch reverses the aforementioned check-in (solving the regressions)
and posits a new solution to the list reversal problem.  The original
problem was that we would always take the lowest mimage LVs from a mirror
when performing a split, but what we really want is to take the highest
mimage LVs.  This patch accomplishes that by working through the list in
reverse order - choosing the higher numbered mimages first.  (This also
reduces the amount of processing necessary.)

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Reviewed-by: Takahiro Yasui <takahiro.yasui@hds.com>
2010-08-06 15:38:32 +00:00
Jonathan Earl Brassow
2b34836c61 A misunderstanding of the return value of 'dm_bit' has been causing a data
corruption bug in cmirror.  'dm_bit' is only ever used as a boolean operation
within LVM, but it can return a range of values.  If the bit is set, a power of
2 is returned.  If the bit is unset, 0 is returned.

'log_test_bit' (a function in the cluster mirror log daemon code) has switched
to using the dm bit operations in rhel6.  There are two places in the daemon
code where 'log_test_bit' is not used merely as a boolean, but rather the
return value is used as the return value for the log functions 'is_clean' and
'in_sync' - having assumed that 'dm_bit' was returning 0 or 1 only.

One place the 'in_sync' function is utilized is in 'dm_rh_get_state' - a
function that informs the mirroring code how to treat I/O and which devices to
read/write from.  'dm_rh_get_state' was checking if the return value of
'in_sync' was 1 to determine if the region was DM_RH_CLEAN.  Since 'dm_bit'
(and by extension 'log_test_bit' and 'in_sync') was returning powers of 2,
DM_RH_CLEAN was rarely being reported as it should have been.  Thinking the
region was out-of-sync, the mirroring code would write only to the primary
device.  When the primary device was failed, all of those writes were lost -
leaving the entire mirror corrupted.
2010-08-04 18:18:18 +00:00
Petr Rockai
32d1d7b23f Reduce severity of the "mirror transient status" log message (this was never
intended to be a log_error).
2010-08-04 15:55:03 +00:00
Mike Snitzer
e7773faa8d Require logical volume(s) to be explicitly named for lvconvert --merge. 2010-08-03 20:22:31 +00:00
Mike Snitzer
b7c2a2b709 Avoid changing aligned pe_start as a side-effect of very verbose logging. 2010-08-03 18:19:42 +00:00
Peter Rajnoha
67ef389a69 Use built-in rules for device aliases: block/ < dm- < disk/ < mapper/ < other. 2010-08-03 13:39:27 +00:00
Zdenek Kabelac
7c1903a0c3 Fix 'void*' arithmetic warnings in dbg_malloc.c.
Use more readable char[idx] access instead of *char+idx access.
2010-08-03 13:24:07 +00:00
Zdenek Kabelac
c8563cac0a Fix 'void*' arithmetic warning in some functions from libdm-iface.c. 2010-08-03 13:16:21 +00:00
Zdenek Kabelac
42c4a7757d Fix const warning in dev_manager_info() and _dev_manager_lv_rmnodes(). 2010-08-03 13:13:01 +00:00
Zdenek Kabelac
8b007b358e Fix constness warning in archive_file structure from archive.c. 2010-08-03 13:09:21 +00:00
Zdenek Kabelac
5cd2cfc203 Use void parameter for function definition. 2010-08-03 13:06:35 +00:00
Zdenek Kabelac
a7ca42cb22 Wait for node creation before displaying debug info in dmsetup.
Readahead check needs to see created node - so wait till udev gets in sync.
2010-08-03 13:04:32 +00:00
Zdenek Kabelac
f562bcedef Clean generated files .exported_symbols_generated, example.conf for distclean. 2010-08-03 13:00:45 +00:00
Zdenek Kabelac
a8dc5260b7 Fix return status 0 for "dmsetup info -c -o help".
Solution returns success for _report_init when help is passed,
and caller needs to check for _report existance.
2010-08-03 12:56:00 +00:00
Peter Rajnoha
d33d0b01bf Add check for kernel semaphore support and disable udev_sync if not available.
udev_sync feature requires semaphores (part of System V IPC) to be configured
in kernel (CONFIG_SYSVIPC). Check whether it is supported and if not, give
a warning message and disable udev synchronisation code automatically to
avoid any further error states and associated problems.

One should use the kernel with System V IPC support enabled or libdevmapper
with udev_sync feature disabled.
2010-08-03 07:56:03 +00:00
Jonathan Earl Brassow
4a5195c38a Taka's fix for handling failure of all mirrored log devices and
all but one mirror leg.

<patch header>
To handle a double failure of a mirrored log, Jon's two patches are
commited, however, lvconvert command can't still handle an error
when mirror leg and mirrored log got failure at the same time.

  [Patch]: Handle both devices of a mirrored log failing (bug 607347)
  posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00009.html
  commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00027.html

  [Patch]: Handle both devices of a mirrored log failing (bug 607347) -
           additional fix
  posted: https://www.redhat.com/archives/lvm-devel/2010-July/msg00093.html
  commit: https://www.redhat.com/archives/lvm-devel/2010-July/msg00101.html

In the second patch, the target type of mirrored log is replaced with
error target when remove_log is set to 1, but this procedure should be
also used in other cases such as the number of mirror leg is 1. This
patch relocates the procedure to the main path.

In addition, I added following three changes.

- Removed tmp_orphan_lvs handling procedure
  It seems that _delete_lv() can handle detached_log_lv properly
  without adding mirror legs in mirrored log to tmp_orphan_lvs.
  Therefore, I removed the procedure.

- Removed vg_write()/vg_commit()
  Metadata is saved by vg_write()/vg_commit() just after detached_log_lv
  is handled. Therefore, I removed vg_write()/vg_commit().

- With Jon's second patch, we think that we don't have to call
  remove_mirror_log() in _lv_update_mirrored_log() because will be
  handled remove_mirror_images() in _lvconvert_mirrors_repaire().
</patch header>

Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2010-08-02 21:07:40 +00:00
Jonathan Earl Brassow
0864378250 Disallow mirrored logs in cluster mirrors.
The cluster log daemon (cmirrord) is not multi-threaded and
can handle only one request at a time.  When a log is stacked
on top of a mirror (which itself contains a 'core' log), it
creates a situation that cannot be solved without threading.

When the top level mirror issues a "resume", the log daemon
attempts to read from the log device to retrieve the log
state.  However, the log is a mirror which, before issuing
the read, attempts to determine the 'sync' status of the
region of the mirror which is to be read.  This sync status
request cannot be completed by the daemon because it is
blocked on a read I/O to the very mirror requesting the
sync status.
2010-08-02 19:03:45 +00:00
Alasdair Kergon
855e4aac41 Fix lib.device-mapper to wait for include too 2010-08-02 13:56:34 +00:00
Zdenek Kabelac
5f8a3896dc Minor speedup of lock test -
Instead of waiting for flock process finish kill the flock process.
2010-08-02 13:23:01 +00:00
Zdenek Kabelac
903bfcaf4b Add shell function to trim spaces.
Test values as "$val" to avoid weird results when spaces are in output.
2010-08-02 13:20:50 +00:00
Zdenek Kabelac
fcf46a9a42 Using count=0 is sufficient for creation of zeroed files. 2010-08-02 13:18:42 +00:00
Zdenek Kabelac
1d1db98a2e Visually better align lines which are executed as a result of true
or false result of previous command.
2010-08-02 13:18:01 +00:00
Zdenek Kabelac
51c8e01575 Do not use VPATH in include/Makefile 2010-08-02 13:17:03 +00:00
Alasdair Kergon
b87c774102 revert the 'Base' change - that wasn't the cause of the problem 2010-08-02 12:57:04 +00:00
Alasdair Kergon
6aa05bdc37 Fix exported_symbols generation to use standard compiler arguments. 2010-08-02 12:44:21 +00:00
Alasdair Kergon
4abee1da76 Use #include <> not "" in lvm2app.h which gets installed on the system. 2010-08-02 12:23:01 +00:00
Alasdair Kergon
2106e8f031 Make liblvm.device-mapper wait for include file generation. 2010-08-02 12:10:35 +00:00
Alasdair Kergon
4befd355b3 Drop explicit 'Base' version from exported symbols. 2010-07-31 14:13:59 +00:00
Alasdair Kergon
a544f403a2 Fix configure to supply DEFAULT_RUN_DIR to Makefiles. 2010-07-31 00:43:41 +00:00
Takahiro Yasui
8f1afa1766 Fix wrong number of mirror log at allocate policy
With mirror_log_fault_policy of 'remove' and mirror_image_fault_policy
of 'allocate', the log type of the mirror volume is converted from
'disk' or 'mirrored' to 'core' when all mirror legs but one in a mirror
volume broke.

Keep new_log_count as a number of valid log devices by using log_count
variable for a temporary usage in the first phase of error recovery
in _lvconvert_mirrors_repair().

Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
2010-07-30 17:50:15 +00:00
Dave Wysochanski
c0789ae1b4 Remove irrelevant comments relating to vg_mda_copies. 2010-07-30 16:47:27 +00:00
Alasdair Kergon
9934a167a2 post-release 2010-07-28 21:58:08 +00:00
Alasdair Kergon
798ffad9a9 Never use clvmd singlenode unless explicitly requested with -Isinglenode. 2010-07-28 14:01:40 +00:00
Alasdair Kergon
fb3b7f2274 Change clvmd to communicate with lvm via a socket in /var/run/lvm. (mbroz)
https://bugzilla.redhat.com/show_bug.cgi?id=614248 [CVE-2010-2526]
2010-07-28 13:55:42 +00:00
Alasdair Kergon
23dd62ec54 add copyright notices to new files 2010-07-28 12:20:38 +00:00
Alasdair Kergon
a1affa1dc3 day+1 2010-07-28 11:49:42 +00:00
Alasdair Kergon
23633cf632 . 2010-07-28 10:44:29 +00:00
Peter Rajnoha
472bd55757 Revert unsuccessful table load preparation in combined "create, load and resume" scenario.
There was missing "revert" call in _create_and_load_v4 fn while the preparation
for table load ends up with failure in create/load/resume sequence. Otherwise
we could end up with a device being created, but not table-loaded nor resumed.

Even though the table is not loaded and the device is not resumed at this
stage, we still need to synchronize with udev when calling the revert
"remove" ioctl - there's still a remove uevent generated! The "revert"
code does exactly that.
2010-07-28 10:30:28 +00:00
Alasdair Kergon
2de4f65474 pre-release 2010-07-27 22:52:19 +00:00
Alasdair Kergon
38befc5ded . 2010-07-27 21:57:37 +00:00
Alasdair Kergon
5f9d855d3e Fix dm_create_lockfile error paths - incorrectly unlinked in-use lockfile.
(Jan Friesse)
2010-07-27 21:56:14 +00:00
Petr Rockai
3c5d941945 Do not create a clustered volume group in t-nomda-missing. 2010-07-27 21:08:32 +00:00
Petr Rockai
2f54dbd28c Make vgck warn about missing PVs. 2010-07-27 20:05:29 +00:00
Jonathan Earl Brassow
b2b6c18db3 Initial import of document describing LVM's policies
surrounding device faults/failures.
2010-07-26 20:31:53 +00:00
Dave Wysochanski
33e7126ba5 Remove unneeded "active" variable in vgchange_monitoring(). 2010-07-26 19:03:29 +00:00
Dave Wysochanski
2ab98aa919 Clarify help text for vg_mda_count. 2010-07-21 19:44:25 +00:00
Jonathan Earl Brassow
9ba875e502 Building without the '--enable-cmirrord' option means that
CMIRRORD_PIDFILE is not defined.  This makes the build fail.
Therefore, we need to conditionalize the check for cmirrord
based on if CMIRRORD_PIDFILE is defined.
2010-07-21 15:21:24 +00:00
Petr Rockai
2dcdb807f8 Don't fail t-pvcreate-operation-md if mdadm is broken. 2010-07-21 14:12:47 +00:00
Jonathan Earl Brassow
56fe3c6176 It's not enough to check for the kernel module in the case of cluster
mirrors, we must also check that the log daemon (cmirrord) is running.
The log module can be auto-loaded, but the daemon cannot be
"auto-started".  Failing to check for the daemon produces cryptic
messages that customers have a hard time deciphering.  (The system
messages do report that the log daemon is not running, but people
don't seem to find this message easily.)

Here are examples of what is printed when the module is available,
but the log daemon has not been started.

[root@bp-01 LVM2]# lvcreate -m1 -l1 -n lv vg
  Shared cluster mirrors are not available.

[root@bp-01 LVM2]# lvcreate -m1 -l1 -n lv vg -v
    Setting logging type to disk
    Finding volume group "vg"
    Archiving volume group "vg" metadata (seqno 3).
    Creating logical volume lv
    Executing: /sbin/modprobe dm-log-userspace
    Cluster mirror log daemon is not running
  Shared cluster mirrors are not available.
    Creating volume group backup "/etc/lvm/backup/vg" (seqno 4).
2010-07-21 13:40:21 +00:00
Alasdair Kergon
321b26f3d4 update configure 2010-07-21 12:54:21 +00:00
Joe Thornber
ad37ab272a [REGEX] calculate dfa states on demand 2010-07-21 12:09:12 +00:00
Joe Thornber
d5feddb0a0 [REGEX] remove the state_queue structure.
Instead we just have a 'next' field in the dfa_state.
2010-07-21 12:02:51 +00:00
Joe Thornber
88ae26178c [REGEX] factor _calc_state() out of _calc_states() 2010-07-21 12:00:53 +00:00
Joe Thornber
e06357084e [REGEX] reduce the number of charset nodes that are produced 2010-07-21 11:58:49 +00:00
Joe Thornber
2e52c46165 [REGEX] another matcher_t test with a larger set of regexes, only interested in the fingerprint 2010-07-21 11:52:46 +00:00
Joe Thornber
a82cf6080e [REPORT-GENERATORS] cut down stylsheet.css to what we actually use 2010-07-21 10:00:38 +00:00
Joe Thornber
78f321b32c [REGEX] add a fingerprinting facility to allow test code to compare dfas 2010-07-20 15:32:07 +00:00
Joe Thornber
3113200309 [UNIT-TEST] test for recent dm_bit_and() function 2010-07-20 15:28:22 +00:00
Joe Thornber
4a854b2927 [UNIT-TESTS] add test for the recent dm_bitset_equal() function 2010-07-20 15:26:43 +00:00
Joe Thornber
a5e61545e4 Add a unit test for the recent changes to dm_bit_get_next() 2010-07-20 15:25:39 +00:00
Joe Thornber
17a259e894 Wire the regex tests up to the reports 2010-07-20 15:21:32 +00:00
Joe Thornber
f5fb224254 Move tests from old-tests/regex to unit-tests/regex. unit-tests will
contain tests that currently work.
2010-07-20 15:18:57 +00:00
Joe Thornber
fe07d17374 Report generators for unit tests and memory checks. Configure with
--enable-testing.
2010-07-20 14:38:44 +00:00
Jonathan Earl Brassow
8d983d6f2d Fix for bug 614164: No check for existing name when splitting mirror
The user could use the same name as an existing LV when specifying a
name for an LV split off from a mirror.  This causes all sorts of
issues.
2010-07-13 22:24:39 +00:00
Jonathan Earl Brassow
2c028990b4 Fix reversal of LV list before performing a split mirror.
When splitting off mirror images from a mirror, we always take
LVs from the end of a list.  For example, if the mirror sub-devices
are lv_mimage_[012], we should select lv_mimage_2 if splitting off
one image.  However, lv_mimage_0 was being selected instead.

The problem came from calling '_move_removable_mimages_to_end'
when it was unnecessary to do so.  When the user /does/ specify
specific devices to be removed, this function properly moved the
appropriate LVs to the end of the list for extraction.  However,
if the user /doesn't/ give any specific PVs, the function should
do nothing.  '_move_removable_mimages_to_end' was keying off of
whether 'removable_pvs' was NULL or not and this value was
improperly being populated with the set of all available PVs.
This was causing '_move_removable_mimages_to_end' to completely
reverse the list, which in turn caused us to extract the
hithertofore front-of-the-list LVs.
2010-07-13 22:04:36 +00:00
Jonathan Earl Brassow
35af86cef2 Fix for bug 612311: Split of linear provides no error msg
An unhandled condition allowed the command to terminate
cleanly without a warning.  Added a check for the
'--splitmirrors' argument to allow execution to the lower
level function that has the check to see if the user is
trying to split a linear device.  You should now see a
message if you try to use --splitmirrors on a linear device.
2010-07-13 21:53:07 +00:00
Jonathan Earl Brassow
659f47f76a Fix for bugs: 612248 & 612291 Split mirror issues
The main problem with these bugs was that the newly split
off LV was not being suspended properly.  This meant that
the memlock count was not being balanced, the DM devices
were not being renamed, and some DM devices which should
have been removed were not.

I've also renamed some of the variables and added comments
to make things clearer as to what is going on.  (I can break
this patch in two if it means easier review.)
2010-07-13 21:48:16 +00:00
Dave Wysochanski
b489acb578 Minor man page updates related to metadataignore and vgmetadatacopies.
pvchange: Add --metadataignore description
vgchange: Fix minor formatting
pvcreate: Update metadataignore description to refer to pvchange
lvm.conf: Refer to pvcreate and pvchange for metadata options.
2010-07-13 15:04:23 +00:00
Fabio M. Di Nitto
e34342c1f3 Add dm_create_lockfile to libdm to handle pidfiles for all daemons.
Switch dmeventd to use dm_create_lockfile and drop duplicate code.
Allow clvmd pidfile to be configurable.
Switch cmirrord and clvmd to use dm_create_lockfile.
2010-07-13 13:51:01 +00:00
Dave Wysochanski
1f6952ba81 More comment updates in lvm2app.h. 2010-07-12 18:29:31 +00:00
Dave Wysochanski
d2f48d07bb Update comments about memory handling in lvm2app.h. 2010-07-12 18:12:23 +00:00
Peter Rajnoha
e9306bead5 Addendum for previous patch - show VG/LV name everywhere so the messages
are consistent.
2010-07-12 12:38:35 +00:00
Peter Rajnoha
a3bdff883b Add more verbose messages while checking volume_list and hosttags settings.
This should bring less confusion when there are some settings left and
people just forgot about it and then they run into problems. These messages
should give them a hint of what's really going on.
2010-07-12 11:37:49 +00:00
Jonathan Earl Brassow
ddedf42d21 Failed to test for the case where a log was requested to be removed
even though there was no log.  A simple run through the in-tree test
suite would have caught this.  :(

-               if (lv_is_mirrored(detached_log_lv) &&
+               if (detached_log_lv && lv_is_mirrored(detached_log_lv) &&

Also, made some cosmetic changes suggested by kabi after my last check-in
(e.g. s/return 0/return_0/ and adding an error message).
2010-07-09 17:57:51 +00:00
Dave Wysochanski
b3e684d1fe Update WHATS_NEW 2010-07-09 17:01:11 +00:00
Dave Wysochanski
b3a13b17cb Add log_error when strdup fails in {vg|lv}_change_tag().
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-07-09 16:57:44 +00:00
Dave Wysochanski
90b4fce199 Remove unnecessary list of includes in liblvm files.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-07-09 16:57:34 +00:00
Alasdair Kergon
2d3164a59f Use __attribute__ consistently throughout. 2010-07-09 15:34:40 +00:00
Alasdair Kergon
37ee12b2eb Fix redundant declarations and always compile with -Wredundant-decls. 2010-07-09 15:26:41 +00:00
Alasdair Kergon
d8d4a1d694 Remove superfluous fn prototypes. 2010-07-09 15:21:10 +00:00
Jonathan Earl Brassow
0b18937cbe Finish fix for bug 607347: failing both redundant mirror log legs...
A previous check-in added logic to handle the case where both images
of a mirrored log failed.  It solved the problem by simply removing
the log entirely - leaving the parent mirror with a 'core' log.  This
worked for most cases.  However, if there was a small delay between
the failures of the two mirrored log devices, the mirror would hang,
LVM would hang, and no additional LVM commands could be issued.

When the first leg of the log fails, it signals the need for repair.
Before 'lvconvert --repair' is run by dmeventd, the second leg fails.
'lvconvert' would see both devices as failed and try to remove the
log entirely.  When it came time to suspend the parent mirror to
update the configuration, the suspend would hang because it couldn't
get any I/O through the mirrored log, which was plugged waiting for
corrective action.  The solution is to replace the log with an error
target to clear any pending writes before removing it.  This allows
the parent mirror to suspend and make the proper changes.
2010-07-09 15:08:12 +00:00
Dave Wysochanski
264091a239 Pass metadataignore to pv_create, pv_setup, _mda_setup, and add_mda.
Pass metadataignore through PV creation / setup paths.
As a result of this cleanup, we can remove the unnecessary setting
of mda_ignore bits inside pvcreate_single(), after call to pv_create.
For now, just set metadataignore to '0' in some places.  This is
equivalent to the prior functionality, although the 0 is given
by the caller not hardcoded in _mda_setup() call.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-07-08 18:24:29 +00:00
Dave Wysochanski
d324fd94df Init mda->list in mda_copy.
This patch should be no functional change as all callers initialize
mda->list.
2010-07-08 17:41:46 +00:00
Zdenek Kabelac
15352cd86e Fix format string from patch apply mistake 2010-07-08 14:47:46 +00:00
Zdenek Kabelac
69ca87abad Revert previous commit as it would return also for incorrect syntax. 2010-07-08 14:29:26 +00:00
Zdenek Kabelac
e0c8e7c740 Update for dmsetup 2010-07-08 13:35:55 +00:00
Zdenek Kabelac
8a9f96b21c Set return value 0 for 'dmsetup -c -o help' 2010-07-08 13:31:03 +00:00
Zdenek Kabelac
bfb05107e3 Small update of memlock debug messages.
Gives slightly better alligned lines for reading.
2010-07-08 13:05:27 +00:00
Zdenek Kabelac
1e699db631 Do not log backtrace in valid _lv_resume() code path 2010-07-08 12:24:04 +00:00
Zdenek Kabelac
f3953f9a15 Minor optimalization of _test_word.
Skip ffs() if  (test >> bit) is 0.
2010-07-08 12:16:16 +00:00
Zdenek Kabelac
7c7f889b1e Cleanups for configure:
Indent updates.
 Use AC_HELP_STRING for help string.
 Start help string with lower letter.
 Add [] around some default values i.e. [TYPE=internal].
 Skip some "" around shell assigment when not needed.
 Fix typo --with-device-gid=UID string.
2010-07-08 12:02:48 +00:00
Dave Wysochanski
f437c92b00 Shorten prompt for pvchange and vgextend. 2010-07-07 21:30:07 +00:00
Dave Wysochanski
44dd5c38e2 Add --force to pvchange, and allow override of prompt involving metadataignore.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-07-07 19:14:57 +00:00
Dave Wysochanski
e0132ef4cb Add prompt if using --metadataignore argument with vgmetadatacopies.
When using vgmetadatacopies value other than "umanaged" (0), prompt
the user if the usage of --metadataignore would change the value of
vgmetadatacopies.  The main 2 cases are:
1) pvchange --metadataignore
2) vgextend --metadataignore

We leave the prompt check in the tools, and do not change anything
if the user says 'n'.

Examples:
vgextend --metadataignore y vgtest /dev/loop0
Setting metadataignore will override preferred number of copies of VG vgtest metadata.
Are you sure? [y/n]: y
  No physical volume label read from /dev/loop0
  Physical volume "/dev/loop0" successfully created
  Volume group "vgtest" successfully extended

pvchange --metadataignore y /dev/loop3
Setting metadataignore on /dev/loop3 will override preferred number of copies of VG vgtest metadata.
Are you sure? [y/n]: y
  WARNING: Changing preferred number of copies of VG vgtest metadata from 3 to 2
  Physical volume "/dev/loop3" changed
  1 physical volume changed / 0 physical volumes not changed

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-07-07 19:02:50 +00:00
Dave Wysochanski
94cb2db723 Add warning to vgextend and pvchange if metadataignore given on cmdline.
Warn the user then change the value of vg_mda_copies.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-07-07 18:59:45 +00:00
Dave Wysochanski
52cf11338e Test failed commit of mda on new pv - failed vgextend.
Test the auto-repair capability when we fail committing to an mda
on a new pv adding to a vg.  This test should fail until we fix
the auto-repair in this case.
2010-07-07 14:43:57 +00:00
Peter Rajnoha
c4cce91dce Use "nowatch" udev rule for known inappropriate devices.
For now, this is just a precaution. Normally, all the other (non-dm) rules
should check DM_UDEV_DISABLE_OTHER_RULES_FLAG and therefore avoid setting
any inotify watches as well. But let's make sure.

Support for final assignment of the "nowatch" rule (the use of ":=") will
appear in next udev release, v160. This should also work in previous udev
versions but the setting won't be sealed so any further OPTIONS="watch" will
always prevail there.

We may want to add more specific "nowatch" rules later if needed.
2010-07-07 11:22:46 +00:00
Alasdair Kergon
23aa1c6524 Adjust auto-metadata repair and caching logic to try to cope with empty mdas.
- If a PV contained empty mdas, the auto-recovery code was not kicking in.
- The 'inconsistent' state was getting lost when metadata was cached so
  recovery didn't kick in.  But leave the behaviour alone when using
  precommitted metadata because of a warning in a confusing FIXME.

In my testing, pvs and vgs didn't repair inconsistent metadata like they
used to do.  (How many other tools fail similarly now?)

And there should be no need to cache inconsistent metadata because it is
supposed to get repaired under the protection of a write lock immediately it is
discovered.

This code is in need of a redesign based on first principles.
I still see bugs in this code and this commit is risky.
2010-07-07 02:53:16 +00:00
Alasdair Kergon
9de129d5b2 post-release 2010-07-07 02:37:28 +00:00
Alasdair Kergon
9d4cb8e97a . 2010-07-07 02:24:05 +00:00
Alasdair Kergon
c1589415f0 fix code in 2nd mda unignore loop to match 1st loop 2010-07-06 20:09:38 +00:00
Alasdair Kergon
389cbc3686 s/flags/mda/ 2010-07-06 17:29:50 +00:00
Alasdair Kergon
2508b4000b shorten mesg 2010-07-06 17:27:32 +00:00
Alasdair Kergon
e5164acc9c fix jumbled args in 'Adjusting' message 2010-07-06 17:26:08 +00:00
Jonathan Earl Brassow
71ee58b6bb Fix for bug 607347: failing both redundant mirror log legs...
Rather than attempting to remove all the images of a mirrored
log volume via remove_mirror_images, simply remove the log
if all its devices have failed.

Taka was the first to report that there is still an outstanding
issue with handling this case.  I've managed to reproduce it
only very rarely, and am still working on identifying the problem.
Failing to handle the problem rarely is better than not handling
the scenario at all, so I'm checking this in.
2010-07-06 17:02:03 +00:00
Alasdair Kergon
3512a6ad0b pre-release 2010-07-06 16:49:38 +00:00
Alasdair Kergon
b02de599a0 Fix dmlosetup snprintf %llu compiler warning. 2010-07-05 22:56:31 +00:00
Alasdair Kergon
38c6e8faf6 Randomly select which mdas to use or ignore.
Add some missing standard configure.in checks.
2010-07-05 22:23:15 +00:00
Alasdair Kergon
431ca070a4 Add parentheses to some libdevmapper.h macro arguments. 2010-07-05 22:22:43 +00:00
Alasdair Kergon
ed2630dce5 Add printf format attributes to yes_no_prompt & dm_{sn,as}printf and fix a calle 2010-07-02 21:16:50 +00:00
Petr Rockai
5a2c9c9cee . 2010-07-02 17:44:17 +00:00
Dave Wysochanski
a7184430f1 Minor changes to man pages for --metadataignore.
Move the definition from pvchange to pvcreate - the location of
other metadata options.
2010-07-02 17:05:22 +00:00
Alasdair Kergon
2e83fee74c remove unneeded header 2010-07-02 10:25:16 +00:00
Alasdair Kergon
bbe29582cb Always pass unsuspended dm devices through persistent filter to other filters.
Move test for suspended dm devices ahead of other filters.
2010-07-02 02:09:57 +00:00
Milan Broz
76470d9608 Fix another segfault in clvmd -R if no response from daemon received.
Missed the same problem in another function...
2010-07-01 21:46:09 +00:00
Milan Broz
715c4ca4cd Remove superfluous suspended device counter from clvmd.
Moreover, in current mirror handling, when it calls activate
on removed but suspended detached log this counter drops below zero
and confuses debug log.
2010-07-01 21:23:47 +00:00
Alasdair Kergon
c097423d77 Fix lvm shell crash when input is entirely whitespace. (Xinwei Hu) 2010-07-01 11:04:58 +00:00
Alasdair Kergon
54637fa44d Move dmeventd man page from install_lvm2 to install_device-mapper. (1.02.50) 2010-07-01 10:57:03 +00:00
Petr Rockai
c192e88bcd Restore the "removemissing" behaviour of lvconvert --repair --use-policies. 2010-07-01 10:10:52 +00:00
Dave Wysochanski
19a12b28bc Update metadata-balance test for --vgmetadatacopies 0.
Should be equivalent to "unmanaged".
2010-06-30 22:22:00 +00:00
Petr Rockai
d55a7f18b0 . 2010-06-30 21:58:13 +00:00
Petr Rockai
4008a5fbf9 Maintain memlock balance in clvmd.
When a mirror is being downconverted in a cluster, a series of suspends and
resumes is executed.

With the change to using UUIDs in dev_manager instead of names, the behaviour
has changed with regards to including an _mlog in the deptree of a logical
volume. In the old (pre-UUID-enabled) code, the _mlog would appear in a deptree
of any volume purely based on a name match: a linear volume foo would include
foo_mlog in its dependencies if that happened to exist. This behaviour was
fixed and the mlog is now only included for mirrors.

By a coincidence, this mlog bug had been hiding a different bug in clvmd. When
a mirror is being dismantled (and converted to a linear volume), it is first
suspended as a whole, then later resumed in parts. Nevertheless, the overall
memlock balance is maintained in this operation. The problem kicks in, because
even though the mirror log was suspended as part of the mirror, when the
dismantled mirror is resumed again, it is no longer a mirror and therefore the
mirror log stays suspended. This would not be a problem in itself, since
_delete_lv (from metadata/mirror.c) is called on it subsequently, which does an
activate/deactivate cycle and removes the LV. The activate/deactivate cycle
correctly prompts clvmd to resume the device: however, in doing this, it will
issue an unpaired resume operation (the suspend that caused the mirror log to
be suspended is paired with resuming the dismantled mirror later). We have
concluded that the path in clvmd should never affect memlock_count, since there
should never be an unmatched explicit suspend preceding this resume.
2010-06-30 21:40:27 +00:00
Alasdair Kergon
e260f729da Fix --[vg]metadatacopies arg processing 2010-06-30 20:21:03 +00:00
Alasdair Kergon
17cffe2bf9 improve vgmetadatacopies unmanaged message 2010-06-30 20:03:52 +00:00
Dave Wysochanski
d1b068a68a Check for missing_pv in vg_remove loop.
If a pv is missing, we should just skip it rather than checking the
device size and failing the vgremove.
2010-06-30 19:55:43 +00:00
Alasdair Kergon
a5053f6ada more mda ignore cleanups 2010-06-30 19:28:35 +00:00
Dave Wysochanski
261a3e7e78 Refactor vg_remove_check to place pv removal into separate function. 2010-06-30 18:03:52 +00:00
Alasdair Kergon
36e28d4b4b more metadataignore message/code cleanup 2010-06-30 17:13:05 +00:00
Alasdair Kergon
cbc562a636 Update partial mode warning message. 2010-06-30 16:43:09 +00:00
Alasdair Kergon
51c4e1ea4b revert that 2010-06-30 14:54:29 +00:00
Alasdair Kergon
c8f03c38fe suppress useless compiler warning 2010-06-30 14:52:29 +00:00
Alasdair Kergon
4107d4d24d post-release 2010-06-30 14:50:32 +00:00
Dave Wysochanski
f0c79d95f5 Only attempt to guarantee 1 mda ignored if there's at least one mda in the vg. 2010-06-30 14:48:07 +00:00
Alasdair Kergon
d13b495217 Only attempt to guarantee 1 mda ignored if there's at least one mda in the vg. 2010-06-30 14:27:40 +00:00
Alasdair Kergon
549e1dfb0d pre-release 2010-06-30 14:04:15 +00:00
Milan Broz
e260ecef0b Fix vgremove to allow removal of VG with missing PVs. (2.02.52) 2010-06-30 14:01:39 +00:00
Alasdair Kergon
b16b4d92a7 Improve various log messages. 2010-06-30 13:51:11 +00:00
Dave Wysochanski
c2584edbdb Update nightly tests for vgextend --metadataignore. 2010-06-30 13:11:12 +00:00
Dave Wysochanski
aa00196b6f Update nightly tests for vgextend --metadataignore. 2010-06-30 13:04:59 +00:00
Dave Wysochanski
75e3996d36 Add --metadataignore to vgextend and man pages. 2010-06-30 13:03:48 +00:00
Dave Wysochanski
d50f13b90d Add pvmetadatacopies to lvm.conf and pvcreate man pages. 2010-06-30 12:49:28 +00:00
Dave Wysochanski
652e9c00c5 Update pvcreate tests for --metadataignore. 2010-06-30 12:17:55 +00:00
Dave Wysochanski
fdcf749779 Add --metadataignore to pvcreate.
Allow metadataignore flag to be passed in to pvcreate.
Ideally, more refactoring of the mda allocation / initialization
is warranted, but for now, we just add another parameter to 'add_mda'
to take an existing mda ignored flag.  We need to do this or pv_write
loses the state of the mda 'ignored' flag before copying and writing
to disk.
2010-06-30 12:17:24 +00:00
Dave Wysochanski
ed38f47f23 Improve logging for setting --vgmetadatacopies.
Example of logging:
metadata/metadata.c:1127     Setting mda_copies = 3 on vg vgtest
metadata/pv_manip.c:296         /dev/loop2 0:      0     25: NULL(0:0)
metadata/pv_manip.c:296         /dev/loop3 0:      0     25: NULL(0:0)
metadata/pv_manip.c:296         /dev/loop4 0:      0     25: NULL(0:0)
metadata/metadata.c:1072     Adjusting ignored mdas on vg vgtest, vg_mda_used_count=5, vg_mda_copies=3
metadata/metadata.c:1015     Setting ignore flag for 2 mdas on vg vgtest
metadata/metadata.c:4151     Setting mda ignored flag for metadata_locn /dev/loop2.
metadata/metadata.c:4151     Setting mda ignored flag for metadata_locn /dev/loop3.
2010-06-29 22:41:28 +00:00
Dave Wysochanski
3b0585be3b Improve logging for metadata ignore by printing device name.
Print device name when setting or clearing metadata ignore bit.
Example:
label/label.c:160       /dev/loop2: lvm2 label detected
cache/lvmcache.c:1136         lvmcache: /dev/loop2: now in VG #orphans_lvm2 (#orphans_lvm2)
metadata/metadata.c:4142     Setting mda ignored flag for metadata_locn /dev/loop2.
format_text/text_label.c:318     Skipping mda with ignored flag on device /dev/loop2 at offset 4096
2010-06-29 22:37:32 +00:00
Dave Wysochanski
ebe1a8080c Add some log_verbose debug statements related to metadataignore.
Logging isn't ideal, especially for mda_set_ignore.  Ideally we'd
like to display the device name and offset in this case but this
requires a bit more work and a per-format 'mda_description' function
pointer definition (we don't have access to mda_context in
metadata.c).
2010-06-29 22:25:58 +00:00
Dave Wysochanski
593bb6470b Move code into pv_change_metadataignore library function.
In preparation to call this from both pvcreate as well as pvchange,
move the guts of metadataignore into a library function.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-29 21:32:44 +00:00
Dave Wysochanski
6ce45042ef Add error message if backup_to_file fails because of empty in_use mdas list. 2010-06-29 15:03:59 +00:00
Dave Wysochanski
f615e84381 Add more initializations of 'mda->flags' field.
Mda allocation needs refactored into a single function but as an
interim step, ensure mda->flags is initialized properly.
2010-06-29 14:52:56 +00:00
Dave Wysochanski
bfaf1a4ecc Attempt to fix intermittent failure with non-debug configured vgcfgbackup.
There's an intermittent failure with vgcfgbackup that seems to have been
introduced with the metadataignore / vgmetadatacopies patchset.
Intermittent failures are often the result of uninitialized data,
so this patch calls zalloc in a few places it might matter.
2010-06-29 13:29:53 +00:00
Dave Wysochanski
bc1a30f542 Update WHATS_NEW for --metadataignore and --vgmetadatacopies changes. 2010-06-29 12:06:14 +00:00
Dave Wysochanski
28e0d5dd64 Update t-covercmd pvck to take proper device argument. 2010-06-28 21:49:31 +00:00
Dave Wysochanski
ffdd12841e Fix compile warning in vgchange.c regarding mda_copies initialization. 2010-06-28 21:35:00 +00:00
Dave Wysochanski
4b660a89d9 Update tests to handle phase 2 (vg based) metadata balance.
Test vgcreate/vgchange --vgmetadatacopies, vgextend, vgreduce,
vgsplit, vgmerge.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:40:27 +00:00
Dave Wysochanski
48386caec5 Update example.conf.in to describe vgmetadatacopies.
Update example.conf to describe vgmetadatacopies.  Provide an
explanation for the '0' ("unmanaged") value.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:40:15 +00:00
Dave Wysochanski
996360df6b Allow 'all' and 'unmanaged' values for --vgmetadatacopies.
Allowing an 'all' and 'unmanaged' value is more intuitive, and
provides a simple way for users to get back to original LVM behavior
of metadata written to all PVs in the volume group.

If the user requests "--vgmetadatacopies unmanaged", this instructs
LVM not to manage the ignore bits to achieve a specific number of
metadata copies in the volume group.  The user is free to use
"pvchange --metadataignore" to control the mdas on a per-PV basis.
If the user requests "--vgmetadatacopies all", this instructs LVM
to do 2 things: 1) clear all ignore bits, and 2) set the "unmanaged"
policy going forward.

Internally, we use the special MAX_UINT32 value to indicate 'all'.
This 'just' works since it's the largest value possible for the
field and so all 'ignore' bits on all mdas in the VG will get
cleared inside _vg_metadata_balance().  However, after we've
called the _vg_metadata_balance function, we check for the special
'all' value, and if set, we write the "unmanaged" value into the
metadata.  As such, the 'all' value is never written to disk.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:40:01 +00:00
Dave Wysochanski
fe260a26ed Ensure --metadatacopies parameter gets interpreted based on command.
Now that we have both --pvmetadatacopies and --vgmetadatacopies,
we need to make sure --metadatacopies gets interpreted correctly.

For pv commands, --metadatacopies should imply --pvmetadatacopies,
and for vg commands, --vgmetadatacopies.

Note: this will change the behavior of vgcreate with --metadatacopies
to be a synonym for --vgmetadatacopies.  Previously, --metadatacopies
would apply to any PVs given with vgcreate that needed an implicit
pvcreate.  As a result, one small change is needed to one of the nightly
tests - t-vgcreate-usage.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:39:39 +00:00
Dave Wysochanski
457bc4fb63 Add --vgmetadatacopies to vgsplit man page and command.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:39:24 +00:00
Dave Wysochanski
eeb7ce60ae Update _vgmerge_single() to move fid->metadata_areas_ignored.
When vgmerge is called we move the mdas from the source to the
destination.  With metadata balancing we now have another mda
list, fid->metadata_areas_ignored, so move the mdas on this list
as well.

This patch should not matter as the code is written today.  However
we include it for completeness in the case that _vgmerge_single()
is refactored and/or moved into a library function.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:39:08 +00:00
Dave Wysochanski
e52586db1c Update check in vg_split_mdas to account for ignored mdas list.
The check in vg_split_mdas will trigger an error if the 'from' vg
list is empty.  However, this might be ok in some instances now
that we have ignored mdas.  Relax this check so an error is triggered
only in the case where there's truly no more mdas in the 'from'
vg.

One example of where this makes a difference is with vgreduce.
If we try to vgreduce a PV with un-ignored mdas, this should trigger
the balancing function to un-ignore mdas on another PV in the VG.
However, we don't get to vg_write() before we fail because this
list size check fails, and we see an error message indicating:
"Cannot remove final metadata area ..."

Another example is with vgsplit into a new VG, where the PVs
being moved contain all ignored mdas.  We must move the mdas on
fid->metadata_areas_ignored from 'vg_from' to 'vg_to'.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:38:56 +00:00
Dave Wysochanski
6cb9642200 Ensure fid mda lists are populated correctly during vgextend.
The vgextend path calls add_pv_to_vg().  Inside add_pv_to_vg(),
we must ensure we pass the correct mdas list into pv_setup(), as
copies of mdas are placed on the vg->fid list.  If we don't place
the mdas on the correct vg->fid list, the various counts may be
incorrect and the metadata balance algorithm will not work when
called from vg_write() path.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:38:39 +00:00
Dave Wysochanski
7ebd46ee53 Add --vgmetadatacopies to vgcreate man page, command, and lvm.conf.
Allow parsing of --vgmetadatacopies for vgcreate.  Accept
--metadatacopies as a synonym for --vgmetadatacopies.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:38:23 +00:00
Dave Wysochanski
d575dfe440 Set vg_mda_copies when pvchange --metadataignore is given.
When a user explicitly sets a new mda ignore value for a PV, we
should update vg_mda_copies accordingly.  When the VG is written
out, the user would not want the new ignore state to get lost as
a result of the vg_mda_copies value and logic in the vg_write
path.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:38:06 +00:00
Dave Wysochanski
074c1880ff Implement _vg_adjust_ignored_mdas and call from vg_write() path.
Compare the value of the newly added vg_mda_copies field
(--vgmetadatacopies parameter) with the current count of
in-use mdas and ignoring or unignoring mdas as necessary to
get to the target count.  Also, as a safety check before
returning, ensure we have at least one mda enabled.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:37:54 +00:00
Dave Wysochanski
b53c1011fc Update vgchange tool to accept --vgmetadatacopies.
Update logic in vgchange to handle --vgmetadatacopies, allow
--metadatacopies as a synonym to --vgmetadatacopies,
and add these parameters to args.h and commands.h
Forbit both --vgmetadatacopies and --metadatacopies as only
one allowed.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:37:37 +00:00
Dave Wysochanski
11f35d31d9 Add vg_mda_copies display field to 'vgs' command.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:37:23 +00:00
Dave Wysochanski
36d07a5cea Make vg->mda_copies persistent in on disk vg metadata.
This patch adds the ability to read/write the vg->mda_copies values
from/to the vg metadata.

If we read the VG metadata and this field does not exist, we set
mda_copies to the default value of 0.  Later in the code, we use
this special '0' value to indicate a disable of metadata balancing.
This should preserve existing LVM behavior and ensure metadata balancing
can be turned off should the need arise.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:37:10 +00:00
Dave Wysochanski
12910a5a29 Add vg get/set methods for VG metadata copies.
This patch adds the get and partially implemented set function.
The 'set' function should probably ignore or un-ignore metadata areas
based on new values.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:36:56 +00:00
Dave Wysochanski
eb93c134a7 Add mda_copies to VG structures and initialization.
Add a field to struct volume_group to later implement metadata
balancing:
- mda_copies: target # of non-ignored mdas in the VG; default 0 (do
not control pv 'ignore mdas' bit.

This patch just adds the parameter to the structures with the default
values but does not modify any commands.  Should be no functional change.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:36:37 +00:00
Dave Wysochanski
493ad74331 Define vgmetadatacopies in vgchange man page.
This patch adds a vgmetadatacopies parameter for metadata balancing.
This parameter provides a simple way for users to create a policy for
placing metadata on PVs automatically by LVM.  The behavior is implemented
inside LVM by managing the 'ignore' mda bits.  We chose the name
'vgmetadatacopies' as this is a natural extension to the existing parameter
'pvmetadatacopies' / 'metadatacopies' in pvcreate.

This is a first step at VG parameter based metadata balancing.  Most users
will probably want to state that they want a certain number of PVs to contain
metadata, and they may be less concerned about a specific number of metadata
copies in the volume group.  However, for default values (pvmetadatacopies
is 1 by default), the number of metadatacopies in the volume group, and the
number of PVs with metadata are the same.  In the future we could add
vgmetadatacopiespvs to define more specifically the number of pvs in the
VG that contain metadata, but for now we start with this parameter.

Another possible future extension would be to define a specific pv tag
to mark the set of PVs that should be used for metadata balancing.  This
tag based approach could be used in conjunction with 'vgmetadatacopies'.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:36:18 +00:00
Dave Wysochanski
a563816d3a Add tests for phase 1 of metadata balance - manage per-PV ignore bit.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:36:06 +00:00
Dave Wysochanski
43526902ae Before committing each mda, arrange mdas so ignored mdas get committed first.
Arrange mdas so mdas that are to be ignored come first.  This is an
optimization that ensures consistency on disk for the longest period of time.
This was noted by agk in review of the v4 patchset of pvchange-based mda
balance.

Note the following example for an explanation of the background:
Assume the initial state on disk is as follows:
PV0 (v1, non-ignored)
PV1 (v1, non-ignored)
PV2 (v1, non-ignored)
PV3 (v1, non-ignored)

If we did not sort the list, we would have a commit sequence something like
this:
PV0 (v2, non-ignored)
PV1 (v2, ignored)
PV2 (v2, ignored)
PV3 (v2, non-ignored)

After the commit of PV0's mdas, we'd have an on-disk state like this:
PV0 (v2, non-ignored)
PV1 (v1, non-ignored)
PV2 (v1, non-ignored)
PV3 (v1, non-ignored)

This is an inconsistent state of the disk. If the machine fails, the next
time it was brought back up, the auto-correct mechanism in vg_read would
update the metadata on PV1-PV3.  However, if possible we try to avoid
inconsistent on-disk states.  Clearly, because we did not sort, we have
a greater chance of on-disk inconsistency - from the time the commit of
PV0 is complete until the time PV3 is complete.

We could improve the amount of time the on-disk state is consistent by simply
sorting the commit order as follows:
PV1 (v2, ignored)
PV2 (v2, ignored)
PV0 (v2, non-ignored)
PV3 (v2, non-ignored)

Thus, after the first PV is committed (in this case PV1), on-disk we would
have:
PV0 (v1, non-ignored)
PV1 (v2, ignored)
PV2 (v1, non-ignored)
PV3 (v1, non-ignored)

This is clearly a consistent state.  PV1 will be read but the mda will be
ignored.  All other PVs contain v1 metadata, and no auto-correct will be
required.  In fact, if we commit all PVs with ignored mdas first, we'll
only have an inconsistent state when we start writing non-ignored PVs,
and thus the chances we'll get an inconsistent state on disk is much
less with the sorted method.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:35:49 +00:00
Dave Wysochanski
b6ec0f9b92 Refactor vg_commit() to add _vg_commit_mdas().
Factor out calling mda->ops->vg_commit() for each mda.
No functional change.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:35:33 +00:00
Dave Wysochanski
4e7f1a3a9d Update _vg_read and _text_create_text_instance to use fid_add_mda[s].
When we are constructing the vg, we may need to adjust the list of
metadata_areas if there are ignored mdas.  At label read time, we
do not read the metadata of ignored mdas, and as a result, they do
not get placed on vg->fid->metadata_areas inside _text_create_text_instance
since lvmcache does not have these areas attached to vginfo->infos.
However, when we're checking the pvids inside _vg_read, after having
read another metadata area from another PV, we do have the opportunity
to update the metadata_area and metadata_areas_ignored lists based
on the read metadata_area.  We need accurate mda lists for the reporting
functions that count the ignored mdas, as well as general correctness
of mda balancing.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:35:17 +00:00
Dave Wysochanski
50e879ef56 Use mdas_empty_or_ignored() in place of checks for empty mda list.
With the addition of ignored mdas, we replace all checks for an empty
mda list with a new function to look for either an empty mda list or
ignored mdas.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:34:58 +00:00
Dave Wysochanski
06468b9049 Add mdas_empty_or_ignored() helper function.
Add a helper function to consolidate checking for an empty mdas list
or ignored mdas.  Ignored mdas should behave almost identically to
an empty mda list - the metadata areas should not be read or written
to.  This function will make it easier to implement metadata balancing
and easier to track pvs with an empty mda list or ignored mdas.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:34:40 +00:00
Dave Wysochanski
2bdbceca91 Implement ignore of mda if bit set by skipping r/w of metadata.
We implement ignore of an mda at label_read time by checking for
the ignore bit, and then skipping the reading of the vgname and
other information in the metadata.  This will have an effect similar
to a PV found with no mdas.  Thus, it will look like an orphan in the
cache until we scan the rest of the system and find a PV with
metadata, and the mda will not be on the vg->fid->metadata_areas
list so no read/writes will be done to the metadata area.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:34:24 +00:00
Dave Wysochanski
d34d85efdd Update pvchange, pvs and vgs man pages for metadata ignore.
Explain --metadataignore argument to pvchange, add new fields to pvs / vgs.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:34:12 +00:00
Dave Wysochanski
7e3278ae8f Add --metadataignore to pvchange, allowing for ignoring of metadata areas.
This patch just modifies pvchange to call the underlying ignore
functions for mdas.  Ensure special cases do not reflect changes
in metadata (PVs with 0 mdas, setting ignored when already ignored,
clearing ignored when not ignored).

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:33:58 +00:00
Dave Wysochanski
bca7c09e6c Define new functions and vgs/pvs fields related to mda ignore.
Define a new pvs field, pv_mda_used_count, and a new vgs field,
vg_mda_used_count to match the existing pv_mda_count and vg_mda_count.
These new fields count the number of mdas that have the 'ignored' bit
clear (they are in use on the PV / VG).  Also define various supporting
functions to implement the counting as well as setting the ignored
flag and determining if an mda is ignored.  These high level functions
call into the lower level location independent mda ignore functions
defined by earlier patches.

Note that counting ignored mdas in a vg requires traversing both lists
and checking for the ignored bit on the mda.  The count of 'ignored'
mdas then is defined by having the bit set, not by which list the mda
is on.  The list does determine whether LVM actually does read/write to
the mda, though we must count the bits in order to return accurate numbers
for the various counts.  Also, pv_mda_set_ignored must search both vg
lists for ignored mda.  If the state changes and needs to be committed
to disk, the ignored mda will be on the non-ignored list.

Note also in pv_mda_set_ignored(), we must properly manage the mda lists.
If we change the ignored state of an mda, we must change any mdas on
vg->fid->metadata_areas that correspond to this pv.  Also, we may
need to allocate a copy of the mda, as is done when fid->metadata_areas
is populated from _vg_read(), if we are un-ignoring an ignored mda.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:33:44 +00:00
Dave Wysochanski
2e163858fc Add metadata_areas_ignored list and functions to manage ignored mdas.
Add a second mda list, metadata_areas_ignored to fid, and a couple
functions, fid_add_mda() and fid_add_mdas() to help manage the lists.

These functions are needed to properly count the ignored mdas and
manage the lists attached to the 'fid' and ultimately the 'vg'.

Ensure metadata_areas_ignored is initialized in other formats, even
if the list is never used.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:33:22 +00:00
Dave Wysochanski
0ad2a4c72f Rename fid->metadata_areas to fid->metadata_areas_in_use.
Rename the metadata_areas list to an 'in_use' list to prepare for
future 'ignored' list.
2010-06-28 20:32:44 +00:00
Dave Wysochanski
b69ca133af Use vg_mda_count() in vgdisplay.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:32:21 +00:00
Dave Wysochanski
d4f0f090c0 Add mda location specific mda_copy constructor.
Because of the way mdas are handled internally, where a PV in a VG
has mdas on both info->mdas and vg->fid->metadata_areas list, we
need a location independent copy constructor for struct
metadata_area.  Break up the existing format-text specific copy
constructor into a format independent piece and a format dependent
piece.

This function is necessary to properly implement pv_set_mda_ignored().

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reviewed-by: Alasdair G Kergon <agk@redhat.com>
2010-06-28 20:31:59 +00:00
Dave Wysochanski
e56f421490 Add mda_locns_match() internal library function for mapping pv/device to VG mda.
A metadata_area is defined independent of the location.  One downside
is that there is no obvious mapping from a pv to an mda.  For a PV in
a VG, we need a way to start with a PV and end up with an MDA, if we
are to manage mdas starting with a device/pv.  This function provides
us a way to go down the list of PVs on a VG, and identify which ones
match a particular PV.

I'm not entirely happy with this approach, but it does fit into the
existing structures in a reasonable way.

An alternative solution might be to refactor the VG - PV interface such
that mdas are a list tied to a PV.  However, this seemed a bit tricky since
a PV does not come into existence until after the list of mdas is
constructed (see _vg_read() - we create a 'fid' and attach mdas to it,
then we go through them and attach pvs).

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reviewed-by: Alasdair G Kergon <agk@redhat.com>
2010-06-28 20:31:38 +00:00
Dave Wysochanski
c9b93bf65a Ensure in-memory state matches on-disk state of mda ignore bit.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:31:18 +00:00
Dave Wysochanski
9b8ce15725 Allow raw_read_mda_header to be called from text_label.c.
We'd like to pass in mda_header to vgname_from_mda().  In order to
do this, we need to call raw_read_mda_header() from text_label.c,
_text_read(), which gets called from the label_read() path, and
peers into the metadata and update vginfo cache.  We should check
the disable bit here, and if set, not peer into the vg metadata,
thus reducing the I/O to disk.

In the process, move vgname_from_mda() to layout.h, since the fn
only gets called from format_text code, and we need the mda_header
definition from the private layout.h.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:31:01 +00:00
Dave Wysochanski
014e7daa36 Move dev_open/dev_close outside vgname_from_mda().
Refactor vgname_from_mda() so caller must open/close the device.
Should be no functional change.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:30:46 +00:00
Dave Wysochanski
306e3b86f1 Move dev_open / dev_close outside _vg_read_raw_area().
This refactoring moves the device open/close up one level to the caller of
_vg_read_raw_area().  Should be no functional change and facilitate future
changes related to metadata balancing.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:30:30 +00:00
Dave Wysochanski
ce7cf5f46b Add location independent flag and functions to ignore mdas.
First we add a 'flags' field to the location independent
metadata_area structure, and a MDA_IGNORE flag.  The
mda_is_ignored and mda_set_ignored functions are added to
manage the flag.  Adding the flag and functions gives a
library interface to ignore metadata areas independent of
the underlying location (disk, file, etc).  The location
specific read/write functions must then handle the specifics
of what this flag means to the location.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Reviewed-by: Alasdair G Kergon <agk@redhat.com>
2010-06-28 20:30:14 +00:00
Dave Wysochanski
3c3b5e0dbf Add text format specific 'rlocn' ignore flag and access functions.
Adding a flag to the 'rlocn' structure in the mda header of the
text format allows us to flip a bit to ignore an area on disk that
stores the metadata via the text format specific mda_header.
This patch defines the flag and access functions to manage the flag.
Other patches will manage the ignore on a format-independent basis,
by using a flag in the metadata_area structure.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:29:57 +00:00
Dave Wysochanski
0fc136fcc7 Change 'filler' to 'flags' in on-disk 'raw_locn' structure.
Future patches will make use of a specific flag in the on-disk 'raw_locn'
structure to enable/disable metadata areas, and facilitate metadata
balancing.

Note that 'filler' is always set to '0' (see add_mda() - memset),
so use of this area as a non-zero flags field is a safe way to
provide future code features.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-28 20:29:42 +00:00
Petr Rockai
80328439be Minor shell style cleanup. 2010-06-28 19:13:33 +00:00
Petr Rockai
3c649de483 Refactor the handles_missing_pv logic in lvchange. 2010-06-28 19:10:16 +00:00
Jonathan Earl Brassow
184ca578a0 Fix for bz608048 from Taka...
The same region size is used for both mirror volume and mirrored
log volume, but when the physical extent size is bigger than region size,
the size of mirror leg for mirrored log is smaller than the region size
and lvcreate command fails.

This patch adjusts a region size of mirrored log to a smaller value of
region size or physical extent size.

[This patch ensures that the region_size of the mirrored log does not
exceed the size of the mirrored log itself, which would violate the
kernel constraint: (region_size <= ti->len).]

Signed-off-by: Takahiro Yasui <takahiro.yasui@hds.com>
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2010-06-28 14:19:41 +00:00
Alasdair Kergon
3643acadac generate liblvm2cmd exported symbols too 2010-06-25 18:23:10 +00:00
Alasdair Kergon
1c78aa19ff Generate liblvm2app and libdevmapper exported symbols from header files.
Detection is simply by prefix - dm_ or lvm_ - and any additional symbols needed
but not detected this way are placed in .exported_symbols.
2010-06-25 18:17:38 +00:00
Alasdair Kergon
a64fec0722 actually, let's keep these in same order as in header 2010-06-25 12:21:47 +00:00
Alasdair Kergon
962811198f Update liblvm2app exported symbols.
Add Makefile target to generate current list of lvm2app.h functions.
2010-06-25 12:19:52 +00:00
Zdenek Kabelac
378c3b1641 Fix typo reported in Debian bugzilla #586043 2010-06-24 08:36:57 +00:00
Zdenek Kabelac
34c937510d Preload libc locale messages.
Preload libc.mo file for localized lvm before taking memory lock - this way
we prevent disk access for some error paths in libdm, that prints localized
errno messages while they are still in memory locked state.
2010-06-24 08:29:30 +00:00
Zdenek Kabelac
e924e9cfb2 Add few missing information about what is this script doing.
(based upon Debian bugzilla suggestion)
2010-06-24 08:18:54 +00:00
Petr Rockai
8f0fa65379 Add a test of wait_for_locks behaviour (adapted from an original by Dave). 2010-06-24 07:57:54 +00:00
Jonathan Earl Brassow
00268c1b82 update WHATS_NEW file with entry for simultaneous mirror image and
mirrored log image fault-handling fix.
2010-06-23 21:01:42 +00:00
Jonathan Earl Brassow
7ab37c0ffc Committing Taka's patch... He found a problem during
the failure of a device that contained both a image of
a mirror and an image of the mirrored log.  The order
of the handling of those faults was important (and
wrong), this patch corrects that.

Patch-From: Takahiro Yasui <tyasui@redhat.com>
2010-06-23 20:32:29 +00:00
Alasdair Kergon
4cae37c340 post-release 2010-06-23 19:35:11 +00:00
Alasdair Kergon
63f8ad01b2 pre-release 2010-06-23 17:48:41 +00:00
Alasdair Kergon
1a6159aca5 In some C++ standards, typeof is not reserved. 2010-06-23 17:03:14 +00:00
Peter Rajnoha
60e9c014a2 Fix udev rules to handle spurious events properly.
We can use DM_UDEV_PRIMARY_SOURCE_FLAG to identify the spurious events
and use it as an indication that the device has already been activated before
(and hence we can find this property in udev database).

WARNING: This change requires udev startup script to preserve udev database
from initrd. All the information stored there during activation of devices
is important for the initial "udevadm trigger --action=add" call that is
used in udev startup script. If not done this way, udev startup script needs
to define DM_UDEV_PRIMARY_SOURCE_FLAG=1 property for any ADD events it uses.
2010-06-23 17:00:32 +00:00
Milan Broz
8f3295456d Fix clvmd init script status
- s/Active clustred VG/clustered VG/ (only LV can be active)

- print only active LVs (not all) in status command
(In the lvdisplay form /dev/vg/lv.)

For now, still use awk (already used in clustered_vgs).

https://bugzilla.redhat.com/show_bug.cgi?id=598495
2010-06-23 16:24:13 +00:00
Mike Snitzer
78df2efac2 Use more standard naming for PVs and VG in vgimportclone example. 2010-06-23 16:12:30 +00:00
Mike Snitzer
546357eaeb Cleanup sentences of the example provided in the vgimportclone man page
(motivated by a patch that Debian was carrying).
2010-06-23 14:15:55 +00:00
Jonathan Earl Brassow
50552f3fd5 The function that runs to compress a stacked mirror after
converting from 2-way to 3-way mirror (collapse_mirrored_lv)
was calling '_remove_mirror_images' with the 'remove_log'
parameter set.  When the code was put in to fix 599898 to
honor log parameters during conversion, this argument was
suddenly being honored.  Thus, when someone would convert from
a 2-way to 3-way mirror, the log would get removed.

'collapse_mirrored_lv' should not be calling '_remove_mirror_images'
with 'remove_log' set.
2010-06-23 13:57:26 +00:00
Zdenek Kabelac
7d62ac4b34 Fix typo: "INTERNAL ERROR" -> "INTERNAL_ERROR"
Author: Xinwei Hu
        xwhu at novell dot com
2010-06-23 12:54:46 +00:00
Alasdair Kergon
beb14c3fa7 Add lv_path to reports to offer full /dev pathname. 2010-06-23 12:32:08 +00:00
Zdenek Kabelac
a15d3d8dcc Fix typo: premitted -> permitted
Signed-off-by: Takahiro Yasui<tyasui@redhat.com>
2010-06-23 10:22:59 +00:00
Zdenek Kabelac
9c7755c862 Better WHATS_NEW message - as we still have not fixed 'make exec_prefix' build 2010-06-23 10:21:00 +00:00
Milan Broz
866553e76f Fix "allocated" warning typo. 2010-06-22 21:10:53 +00:00
Dave Wysochanski
f8f2be527f Add device name to output of error messages in raw_read_mda_header().
It would be helpful if we had the device name when something like
a mda_header checksum error occurs.

Before:
./tools/lvm pvs -opv_name,vg_name,uuid,mda_count,pv_mda_count_ignored,vg_mda_count,vg_mda_count_ignored,vg_mda_copies
  Incorrect metadata area header checksum
  PV         VG      PV UUID                                #PMda #PMdaIgn #VMda #VMdaIgn #VMdaCps
  /dev/loop0 vgtest2 sVv26t-gjpb-Rcau-uBDO-Cx04-GbRR-6Ssq7e     2        0     4        0        4
  /dev/loop1 vgtest2 zXWStT-qE8F-mbkc-RfgH-aytv-mptF-Y5Ce09     2        0     4        0        4
  /dev/loop2         riCpK9-9G8r-LlIp-i2oh-mb3N-CUzk-u5YpuR     1        0     0        0        0
  /dev/loop3 vgtest  tQCUjm-rmyd-i92d-4eeE-UYBW-v1vQ-kRaA17     2        0     4        2        0
  /dev/loop4 vgtest  ZRvpeI-p8F1-ccVW-BBac-xhl1-aGXU-CbP0oo     2        2     4        2        0

After:
./tools/lvm pvs -opv_name,vg_name,uuid,mda_count,pv_mda_count_ignored,vg_mda_count,vg_mda_count_ignored,vg_mda_copies
  Incorrect metadata area header checksum on /dev/loop2 at offset 4096
  PV         VG      PV UUID                                #PMda #PMdaIgn #VMda #VMdaIgn #VMdaCps
  /dev/loop0 vgtest2 sVv26t-gjpb-Rcau-uBDO-Cx04-GbRR-6Ssq7e     2        0     4        0        4
  /dev/loop1 vgtest2 zXWStT-qE8F-mbkc-RfgH-aytv-mptF-Y5Ce09     2        0     4        0        4
  /dev/loop2         riCpK9-9G8r-LlIp-i2oh-mb3N-CUzk-u5YpuR     1        0     0        0        0
  /dev/loop3 vgtest  tQCUjm-rmyd-i92d-4eeE-UYBW-v1vQ-kRaA17     2        0     4        2        0
  /dev/loop4 vgtest  ZRvpeI-p8F1-ccVW-BBac-xhl1-aGXU-CbP0oo     2        2     4        2        0
2010-06-22 19:18:27 +00:00
Milan Broz
88880e1f89 Use flexible data[] in cmirrord request to prevent abort in runtime size checks. 2010-06-22 13:11:29 +00:00
Zdenek Kabelac
3cce4419c2 Adding section number for lvm.conf manpages. 2010-06-22 07:34:34 +00:00
Jonathan Earl Brassow
bc59859a59 Mirrors can be layered - as in the case of an converting 2-way
to 3-way mirror.  When conversion operations are performed on
these types of mirrors, log options can be confused/ignored.

In the case of a converting 3-way mirror, we have a top-level
2-way corelog mirror whose legs are 1) a 2-way disk-log mirror
and 2) a linear device.  If we wish to convert this 3-way mirror
to a 2-way mirror, the linear device is removed and the extra
top layer is eliminated.  If we also wished to convert the disk
log to a core log in the same step, ambiguity creeps in.  It is
somewhat obvious what the user wants - a 2-way mirror with a
corelog.  However, looking at the top level mirror before
compression, it seems that the mirror already has a core log.
This is why the operation seemed to fail.

This patch simply re-evaluates what mirrored_seg points to after
a compression and then considers the log argument.

This is a fix for bug 599898.
2010-06-21 16:12:33 +00:00
Jonathan Earl Brassow
ecf06d3a49 Add error checking for calls to sprintf - it can fail for more
reasons than just 'out-of-space'.
2010-06-21 16:07:06 +00:00
Alasdair Kergon
d48af1520d Various cleanups following recent commits. 2010-06-21 15:56:57 +00:00
Milan Broz
aa9797fa3c Let running clvmd process time to reexec.
Because execve stops the command loop,
we never receive response (only socket close) for clvmd -S,
so waiting for response here makes no sense.

But if the calling process (clvmd -S)  exits too early, connection
is closed from client side, clvmd takes this as an error and
never run restart code.

Ugly hack(TM).
2010-06-21 10:45:15 +00:00
Peter Rajnoha
44a82780b9 Use early udev synchronisation and update of dev nodes for clustered mirrors.
When using clustered mirrors, we need device nodes to be created during
processing of device tree, not at its end like we normally do (we need to
access the nodes in cmirror prematurely). Therefore we use a new flag called
"immediate_dev_node" stored in deptree's load_properties struct to instruct the
device tree processing code to immediately synchronize with udev and flush all
stacked node operations so the nodes are prepared for use.

For now, the immediate_dev_node is used for clustered mirrors during
processing the dm_tree_preload_children code only. We can add more later if
needed.
2010-06-21 08:54:32 +00:00
Jonathan Earl Brassow
c76681596e daemons/cmirrord/functions.c (part of cmirrord) was referencing
linux/kdev_t.h even though it wasn't needed.  Strangely, it seems
to be causing problems on various architectures (i686) in the
function daemons/cmirrord/functions.c:disk_status_info()->sprintf.

I'm not sure why this is a problem since none of the macros in
kdev_t.h are used in that code, but it certainly doesn't hurt to
pull an unnecessary header and it seems to fix the problem.
2010-06-18 20:58:04 +00:00
Zdenek Kabelac
3ce8520c5e Add man pages for lvmconf and unsupported lvmsadc and lvmsar tools. 2010-06-18 10:19:29 +00:00
Milan Broz
01f5823ee4 Fix exit code when requesting help using documented -o help option.
IOW fix lvs -o help
...
  Command failed with status code 5.
2010-06-17 13:15:51 +00:00
Milan Broz
c7ec3c65f4 Clean up cluster lock mode and flags definition.
Code is mixing up internal DLM and LVM definitions of lock
modes and flags.

OpenAIS and singlenode locking do not depend on DLM but
code currently cannot be compiled without libdlm.h!

LCK_* flags is LVM abstraction, used through all the code.
Only low-level backend (clvmd-cman etc) should use DLM definitions,
also this code should do all needed conversions.

Because there are two DLM flags used in generic code
(NOQUEUE, CONVERT) we define it similar way like lock modes.
(So all needed binary-compatible flags are on one place in locking.h)

(Further code cleaning still needed, though:-)
2010-06-17 12:48:54 +00:00
Zdenek Kabelac
d01b8d03db Add man page for dmeventd
(inspired by patch from Danny Rawlins, monster.romster at gmail dot com)
2010-06-17 12:14:43 +00:00
Zdenek Kabelac
41ff900c99 Update lvresize/extend/reduce manpages with --nofsck, --resizefs options 2010-06-17 11:25:43 +00:00
Milan Broz
9ca07f4298 Fix lvm2cmd example in documentation. 2010-06-16 13:03:48 +00:00
Milan Broz
0e93522190 Remove C++ private keyword from headers.
Add extern C definition for libdevmapper, lvm2app and lvm2cmd.
2010-06-16 13:01:25 +00:00
Zdenek Kabelac
ebd1d49150 Use "" instead of <> for configure.h and libdevmapper.h
Move configure.h as the first header for clvmd source files.
2010-06-15 11:00:44 +00:00
Zdenek Kabelac
057462c4e6 Update WHATS_NEW for last commit 2010-06-07 14:39:18 +00:00
Zdenek Kabelac
b5e36cb96b Fix wrong usage of exec_prefix from previous patch introducing LVM_PATH define
Introduce lvm_exec_prefix with resolved exec_prefix.
(using same ac_default_prefix as for CLVMD_PATH)
Use lvm_exec_prefix instead of dmeventd_prefix (fixes missing ac_default_prefix)

Note: This patch is rather hot-fix as currently generate code
does not create correct code for make exec_prefix=
2010-06-07 14:31:59 +00:00
Milan Broz
f58ca71942 Fix segfault in clvmd -R if no response from daemon received. 2010-06-07 13:52:52 +00:00
Alasdair Kergon
262a39dfd0 post-release 2010-06-07 10:25:43 +00:00
Alasdair Kergon
d531d56dbb pre-release 2010-06-04 17:23:49 +00:00
Milan Broz
8b78a832fa Fix restart of clvmd using -S switch
- allocate environment dynamically (still missing some limit?)
 - try to recover, if destroy failed (do not destroy lvm here) and free memory
 - check strdup() return codes
 - report failure to log
 - do not print NULL in exclusive lock loop
2010-06-04 12:59:30 +00:00
Milan Broz
8c19b52bda Fix clvmd initscript restart command to start clvmd if not yet running. 2010-06-03 21:03:53 +00:00
Zdenek Kabelac
a7237281ea Use INSTALL_DIR to create directories 2010-06-03 13:52:21 +00:00
Zdenek Kabelac
0b6ee2252c Use absolute paths in commands
clvmd restart does not work at all if clvmd binary is not in current
dir.
2010-06-03 13:50:26 +00:00
Milan Broz
a9ffbb0803 Require partial option in lvchange --refresh for partials LVs.
We must not refresh LV if some PVs are missing and partial activation
was not requested.

Fixes https://bugzilla.redhat.com/show_bug.cgi?id=598886
2010-06-03 12:45:05 +00:00
Dave Wysochanski
224457fb0f Revert _init_rand() to reset errno - restores original init behavior. 2010-06-01 21:47:57 +00:00
Dave Wysochanski
cacbf14d8e Do not fail lvm_init() if init_logging() generates an errno.
Revert to original behavior of lvm commands if init_logging() generated an
errno.  Fixes rhbz 592967.
2010-06-01 21:46:29 +00:00
Alasdair Kergon
b00dc7153a Don't merge unchanged persistent cache file before dumping if tool scanned. 2010-06-01 19:02:12 +00:00
Peter Rajnoha
9dbad2f74d Add support for dm-mod module autoload.
A kernel patch is on its way for 2.6.35 adding support for dm-mod module
autoload. Udev v155 and higher is able to read static node information given
in modules.devname (extracted by depmod before) and will create such nodes
at its start. The first access to such node will load the module automatically
(directly in kernel) before the actual read/write operation is processed.
2010-06-01 16:08:13 +00:00
Peter Rajnoha
97617cfc76 Fix incorrect memory pool deallocation while using vg_read for files.
We create a separate pool "lvm2 vg_read" for vg_read and we don't use
cmd->mem anymore.
2010-06-01 12:08:50 +00:00
Mike Snitzer
735cac19f2 Add --type parameter description to the lvcreate man page. 2010-05-28 03:50:15 +00:00
Mike Snitzer
3755951091 Document 'clear' in dmsetup man page. 2010-05-27 19:00:14 +00:00
Peter Rajnoha
60af6fb0a2 Use expected union semun for arguments in selected semaphore operations.
This is standard and expected use. It also prevents errors related with
misalignment of the arguments for semaphore operations on some architectures.
2010-05-27 15:02:56 +00:00
Zdenek Kabelac
333cf73ceb Fix copy&paste detection of kernel release version.
Add log_error to avoid return_0 without log_error.
2010-05-25 08:40:36 +00:00
Alasdair Kergon
bc653452a7 Replace strncmp kernel version number checks with proper ones 2010-05-24 23:11:34 +00:00
Alasdair Kergon
ca92b9a5f0 Avoid selecting names under /dev/block if there is an alternative. 2010-05-24 22:53:48 +00:00
Milan Broz
afe102c0fe Fix topology test after last changes to not leak loop devices. 2010-05-24 19:27:38 +00:00
Alasdair Kergon
fa7fa300e3 Choose between clustered log versions based on kernel version.
Add fixmes for broken strcmp.
2010-05-24 17:46:47 +00:00
Milan Broz
3c2b019578 Use mv -f for replace config in test. 2010-05-24 17:19:50 +00:00
Milan Broz
87003ba5b1 Replace lvm.conf instead of truncating it. 2010-05-24 17:18:05 +00:00
Petr Rockai
bf7d3d2208 t-lvconvert-repair-transient hangs on .33 and on RHEL6 kernel -> disable 2010-05-24 16:33:21 +00:00
Alasdair Kergon
4e553333d9 Update clustered log kernel module name to log-userspace. 2010-05-24 16:30:15 +00:00
Petr Rockai
954df8f3d7 Account for mirror transient status when doing lvconvert --repair. 2010-05-24 15:32:20 +00:00
Petr Rockai
078573dc2c Skip t-pvcreate-operation-md if mdadm fails to create the required device. 2010-05-24 15:28:45 +00:00
Zdenek Kabelac
c4655582e6 Update Copyright date for resently modifed files 2010-05-24 09:04:27 +00:00
Zdenek Kabelac
360f97c31d Replicator: update activate code for vgchange
Activate only the first replicator-dev LV, that activates all other
related LVs from Replicator. In case of error during this activation,
it will not retry again for other heads (less confusing error log).
2010-05-24 09:03:39 +00:00
Zdenek Kabelac
f20300cc8b Replicator: add replicator to dtree
Adding all replicator related LVs to dtree.
Start of one replicator_dev initiate start of all other related.
2010-05-24 09:01:05 +00:00
Zdenek Kabelac
f687bd63b8 Replicator: VG with cmd_missing_vgs does not generate output
Do not print message if missing VG is found.
2010-05-24 08:59:29 +00:00
Mikulas Patocka
b68340f6d9 Fix scripts/relpath.awk to work with mawk
length(array) is specific to GNU awk and doesn't work in mawk.
Use a return value of "split" function to indicate array size, this is
supported in both gawk and mawk.

This patch fixes the following errors during "make install" when mawk is
installed as a default awk.

mawk: scripts/relpath.awk: line 25: illegal reference to array from
mawk: scripts/relpath.awk: line 25: illegal reference to array to
mawk: scripts/relpath.awk: line 27: illegal reference to array from
mawk: scripts/relpath.awk: line 32: illegal reference to array to

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
2010-05-21 15:28:16 +00:00
Zdenek Kabelac
c5eb266a80 Replicator: update _create_partial_dtree()
Adding function _add_partial_replicator_to_dtree() to create
partial tree for Replicator target.

Using dm_tree_node_set_presuspend_node() for Replicator.
2010-05-21 14:47:58 +00:00
Zdenek Kabelac
04f3cea84a Replicator: activate checks for missing vgs
Activation needs to have all remote VGs locked to pass for replicator.
So if activated LV is replicator-dev LV - read all remote VGs.
2010-05-21 14:34:01 +00:00
Zdenek Kabelac
c1fe5092c3 Replicator: lock_lv_vol() finds missing VGs
Find and check for all needed VGs before calling lock_vol().
2010-05-21 14:29:49 +00:00
Zdenek Kabelac
5257248ed9 Replicator: use cmd_vg for process_each_lv_in_vg()
As for _process_one_vg() we need similar retry loop for
process_each_lv_in_vg(). This patch retries to process
failed LVs with reopened VGs.

Patch does not add any extra repeated invocations if there is not
found any missing VG during LV processing.
2010-05-21 14:15:39 +00:00
Zdenek Kabelac
780010a268 Replicator: use cmd_vg list for _process_one_vg()
Patch modifes behavior of _process_one_vg().

In the first pass vg_read() collectis for replicator sorted list of
additional VGs during lock_vol().
If any other VG is needed by the replicator and it is not yet opened
then next iteration loop is taken with all collected VGs.

Flag vg->cmd_missing_vgs detects missing VGs.
2010-05-21 14:11:13 +00:00
Zdenek Kabelac
8571d54f61 Replicator: add read and release VGs for rsites
Add functions to read and release remote VGs for replicator sites
in activation context.
2010-05-21 14:07:16 +00:00
Zdenek Kabelac
13fed1e445 Add toolcontext.h header file. 2010-05-21 13:34:09 +00:00
Zdenek Kabelac
2179ff624c Remove files from wrong directory 2010-05-21 13:18:25 +00:00
Zdenek Kabelac
8b3434061e Right directory name for replicator files.
My local CVS was placing file in slightly different directory by using
obsolete files.
2010-05-21 13:17:20 +00:00
Zdenek Kabelac
68a6a348b5 Hmm - fixing cvs import mistake 2010-05-21 13:14:02 +00:00
Zdenek Kabelac
d8659175cd Replicator: add find_replicator_vgs
Adding find_replicator_vgs() function to find all needed
VGs for replicator-dev LV.

This function is later called before taking lock_vol().
2010-05-21 12:55:25 +00:00
Zdenek Kabelac
1431ddad9c Replicator: add sorted cmd_vg list
Introduce struct cmd_vg to store information about needed
volume group name, vgid, flags and the pointer to opened VG.

Keep VGs list in alphabetical order for locking order.

Introduce functions:
cmd_vg_add() add new cmd_vg entry.
cmd_vg_lookup() search cmd_vgs for vg_name.
cmd_vg_read() open VGs in cmd_vgs list.
cmd_vg_release() close VGs in reversed order.
2010-05-21 12:52:01 +00:00
Zdenek Kabelac
5f860775c8 Replicator: extend volume_group with list of VGs and flag
Add pointer to linked list of opened VGs. List temporarily keeps
the information about needed or locked and opened VGs for replicator target.

Also add cmd_missing_vgs flag information for quick check and
also for possible continuos process_each_lv() usage where we need
to detect whether failure has been caused by missing VG or
some other reason.
2010-05-21 12:47:46 +00:00
Zdenek Kabelac
65f5ed533f Replicator: extend _lv_each_dependency() with dependencies for Replicator devices 2010-05-21 12:45:18 +00:00
Zdenek Kabelac
c4276aad11 Replicator: check replicator segment
Check for possible problems within replicator structures.
Used also by vg_validate.
2010-05-21 12:43:02 +00:00
Zdenek Kabelac
d4ae0dedf2 Replicator: new files for Replicator target 2010-05-21 12:40:05 +00:00
Zdenek Kabelac
312a15d24e Replicator: base lvm2 support
Adding configure.in support for Replicators.
Adding basic lib lvm support for Replicators.
Adding flags REPLICATOR and REPLICATOR_LOG.
Adding segments SEG_REPLICATOR and SEG_REPLICATOR_DEV.
Adding basic methods for handling replicator metadata.
2010-05-21 12:36:30 +00:00
Zdenek Kabelac
bc1024812a Replicator: check open_count for parents of presuspend_node
For deactivation of Replicator check in advance that all heads
have open_count == 0. For this presuspend_node is used as all
head nodes are linking this control node.
2010-05-21 12:30:35 +00:00
Zdenek Kabelac
451be927a8 Replicator: support deactivate of replicator-dev nodes
Introducing dm_tree_node_set_presuspend_node() for presuspending child
node (i.e. replicator control target) before deactivation of parent node
(i.e. replicator-dev target).

This patch presents no functional change to current dtree - only
replicator target currently sets presuspend node for dev nodes.
2010-05-21 12:27:02 +00:00
Zdenek Kabelac
4839e5d913 Replicator: libdm support
Introducing new API calls:
dm_tree_node_add_replicator_target()
dm_tree_node_add_replicator_dev_target().

Define new typedef dm_replicator_mode_t.
2010-05-21 12:24:15 +00:00
Zdenek Kabelac
874d697f4c API change for args of process_each_lv_in_vg()
Patch adds failed_lvnames to the list of parameters for process_each_lv_in_vg().
If the list is not NULL it will be filled with LV names of failing LVs
during function execution.

Application could later reiterate only on failed LVs.
2010-05-21 12:21:51 +00:00
Zdenek Kabelac
f80cbe8c33 Return ECMD_FAILED for break in process_each_lv() and process_each_segment_in_lv() 2010-05-21 12:19:22 +00:00
Alasdair Kergon
154a7af5ee post-release 2010-05-20 23:21:53 +00:00
Alasdair Kergon
cea69ab9d0 Fix $(INSTALL_SCRIPT) 2010-05-20 22:54:58 +00:00
Alasdair Kergon
6b3f81e647 backup->cache 2010-05-20 22:32:44 +00:00
Alasdair Kergon
fbee662499 pre-release 2010-05-20 22:27:26 +00:00
Alasdair Kergon
eedf87f891 If unable to obtain snapshot percentage leave value blank on reports. 2010-05-20 22:24:33 +00:00
Alasdair Kergon
89cb3a410a Add make install_initscripts 2010-05-20 14:45:14 +00:00
Alasdair Kergon
ae1ec1b418 Add install_system_dirs makefile target.
Add configure options for system and locking directories.
2010-05-20 13:47:21 +00:00
Alasdair Kergon
c357c91b7d Generate example.conf so default lvm.conf contents can be configured. 2010-05-20 11:45:56 +00:00
Alasdair Kergon
b3f073c48c Install lvmconf script by default.
Remove unnecessary versioned dmeventd plugin symlinks.
2010-05-20 11:20:35 +00:00
Dave Wysochanski
cd8ffd8725 Remove another pvchange hack involving orphan VG names.
Unnecessary as a result of recent changes.
2010-05-19 15:34:10 +00:00
Dave Wysochanski
bbe4a85be1 Remove hack in pvchange to unlock orphan VG.
With agk's recent changes, the lock/unlock APIs can properly handle
orphan VG names.
2010-05-19 13:21:09 +00:00
Dave Wysochanski
7e6f58c0a9 Fix warnings with conversion of uuid.
More cleanup of uuid casting / structures is needed but for now just
cast like the rest of the code.
2010-05-19 12:12:47 +00:00
Dave Wysochanski
275654210d Update WHATS_NEW 2010-05-19 11:57:05 +00:00
Dave Wysochanski
f649252de9 Test lvm_vgname_from_{pvid|device}. 2010-05-19 11:53:30 +00:00
Dave Wysochanski
d3ce80e544 Add lvm2app interfaces to lookup a vgname from a pvid and pvname.
lvm2app forces applications to start with a volume group name,
open the volume group, then operate on individual pvs.  In some
cases the application may want to start with a device name rather
than the volume group name.  Today, if an application wants to
do this, it must iterate through all the volume groups to find
the volume group that the specific device is attached to.
These new interfaces allow the application to avoid such overhead.
Bump the lvm2app version number to 3.
2010-05-19 11:53:12 +00:00
Dave Wysochanski
ea956b4f6c Update pvchange to always obtain a vg handle for each pv to process.
Earlier patches added some infrastructure to lookup a vgname from
a pvname.  We now can cleanup some of the pvchange and other code
by requiring callers that want to modify some pv property:
1) lookup the vgname by the pvname
2) use the vgname to obtain a vg handle
3) get the pv handle from the vg handle

This should work going forward and be a much cleaner interface,
as we move away from pvs as standalone objects.
2010-05-19 11:53:00 +00:00
Dave Wysochanski
ada4c61860 Add find_vgname_from_{pvname|pvid} functions.
Some commands start with a pvname, but we'd like to force users to
start with a vg handle to obtain a pv handle.  Our best option seems
to be providing a way to look up the vgname from the pvname, and then
require them to use vg_read/vg_open.

In addition to the pvname lookup function, this patch also provides a
lookup by pvid.  The lookup by pvid can be used in conjunction with
lvmcache_get_pvids to process all pvs in the system.

The pvid find function first calls lvmcache_vgname_from_pvid, which may
cause the label to be read if it is not in the cache.  If the vgname is
returned is an orphan, we then check to see if there are metadata areas,
and if not, we scan every PV on the system by calling scan_vgs_for_pvs().
In most cases we should not need to do this, and by using the info->mdas
count, we avoid calling pv_read() as prior code did.  So this patch is a
bit cleaner and should allow us to refactor more of the pv code.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-05-19 11:52:37 +00:00
Dave Wysochanski
9bcaa94756 Add lvmcache_vgname_from_pvid().
Add lvmcache function to lookup a vgname from a pvid.
2010-05-19 11:52:21 +00:00
Dave Wysochanski
49a81e190e Add pvid_from_devname() lvmcache function.
Add supporting function for mappings from devname -> pvid -> vgname.
2010-05-19 11:52:07 +00:00
Alasdair Kergon
5303eca8fa Add is_global_vg and split out from is_orphan_vg. 2010-05-19 02:36:33 +00:00
Alasdair Kergon
1b37183e74 Validate orphan and VG_GLOBAL lock order too. 2010-05-19 02:08:50 +00:00
Alasdair Kergon
09f17bf72d Note that orphan lock is always obtained last 2010-05-19 01:49:08 +00:00
Alasdair Kergon
449ffc7b05 Accept orphan VG names as parameters to lock_vol() and related functions. 2010-05-19 01:16:40 +00:00
Alasdair Kergon
bea9cf6725 Use is_orphan_vg in place of hard-coded prefix tests. 2010-05-19 00:52:55 +00:00
Alasdair Kergon
a0c7ead88f post-release 2010-05-17 20:18:13 +00:00
Alasdair Kergon
3eb97ffa25 pre-release 2010-05-17 18:39:01 +00:00
Fabio M. Di Nitto
48d651f492 Fix clvmd init script stop function to not deactive non-clustered volume groups.
https://bugzilla.redhat.com/show_bug.cgi?id=592362
2010-05-17 03:18:27 +00:00
Jonathan Earl Brassow
0d272a6964 Disallow toggling the cluster attribute of a volume group if there
are active mirrors or snapshots.

We don't have the mechanisms in place to change the device-mapper
tables for those targets that have behavioral differences between
cluster and single machine instances.  Allowing users to change
the attribute but not changing the target's behavior can lead to
data corruption.

The following bugs are fixed/avoided by this patch:
235123 - vgchange -c [ny] do not change target types when necessary
289331 - RFE: switching from cluster domain to local domain needs to deactivate volume somehow
289541 - when changing from local to cluster, volumes can not appear to be deactivated
2010-05-14 15:19:42 +00:00
Alasdair Kergon
578c4098e6 Use -d to control level of messages sent to syslog by dmeventd.
Change -d to -f to run dmeventd in foreground.
(mornfall)
2010-05-14 14:56:39 +00:00
Alasdair Kergon
c34cb6adcf Fix static build. 2010-05-14 13:36:56 +00:00
Zdenek Kabelac
cb5d05fd34 For lcov target there is no need to include source file dependencies 2010-05-14 13:32:36 +00:00
Milan Broz
5899d9b442 Another one internal device layer fix... 2010-05-14 12:39:52 +00:00
Milan Broz
40026b0762 Fix empty layer detection is scan devices. 2010-05-14 12:30:43 +00:00
Milan Broz
83b4f8366a Fix device_is_usable to properly detect only internal LV names. 2010-05-14 12:03:32 +00:00
Zdenek Kabelac
8e50c4e4bb Use /bin/bash for scripts with bashisms 2010-05-14 11:33:20 +00:00
Milan Broz
2b129732b0 Skip also special lvm devices in scan (if ignore suspended is used).
This should avoid various races between dmeventd on multiple nodes
in cluster where one node already repairing device and another
run full scan and locks the device.
2010-05-13 18:38:38 +00:00
Milan Broz
8cf86745c2 Do not print encryption key in message debug output. 2010-05-13 13:31:30 +00:00
Milan Broz
5ee39f1a1d Currently if clvmd is running and user issues vgscan,
the device cache file is dumped both in vgscan and clvmd process.

Unfortunately, clvmd calls lvmcache_label_scan,
it properly destroys persistent filter, but during
persistent_filter_dump it merges old cache content back!

This causes that change in filters is not properly propagated
into device cache after vgscan on cluster.
(Only new devices are added.)

https://bugzilla.redhat.com/show_bug.cgi?id=591861
2010-05-13 13:04:03 +00:00
Petr Rockai
8c998ebbed Chop up the big t-lvconvert-mirror-basic loop across 4 separate test scripts
(and use sourcing to avoid code duplication).
2010-05-12 13:15:38 +00:00
Petr Rockai
e2eb9b24b7 Run tests in alphabetical (and thus stable across machines) order. 2010-05-12 11:59:46 +00:00
Petr Rockai
374e3502fe Bump the debug.log level from 4 to 9 (the numbering is different from verbose). 2010-05-12 11:58:51 +00:00
Petr Rockai
1ad6e72ade Revert the "repeat failed tests with -vvvv" feature. Instead, dump debug logs
to a file using log/file, with log/overwrite set and dump this file in
STACKTRACE. The overall effect is that only the command that ran last before
the failure has been triggered will get its debug output logged. This is
similar to how we treat coredumps.
2010-05-12 11:23:16 +00:00
Petr Rockai
d7eaafe9c1 A fairly extensive refactor of the mirror testing code. The exhaustive
lvconvert testing is now in its own test, t-lvconvert-mirror-basic ... it
doesn't do anything fancy but it does run lvconvert through a lot of
combinations.

I have also merged the remaining t-mirror-lvconvert tests into
t-lvconvert-mirror and abolished the former. The latter will be split again
later into more thematic divisions. (The previous split was rather arbitrary,
may I even say random...)
2010-05-12 10:08:35 +00:00
Petr Rockai
2400150734 Revert the huge device creation in t-lvconvert-mirror and downgrade the
race-induced failure to a warning, until we have a better fix for this.
2010-05-12 06:09:22 +00:00
Petr Rockai
8ad2a270f4 Add a test for activation in presence of missing devices. This partially covers
the robustness requirements for system startup that we have.
2010-05-12 06:02:28 +00:00
Petr Rockai
73a006403f Rename t-partial-activate to t-activate-partial (more activation tests coming). 2010-05-12 05:59:24 +00:00
Petr Rockai
a3513577f0 The "should" testing utility should actually only give a warning when the
command fails.
2010-05-12 05:55:42 +00:00
Petr Rockai
29ad12df51 Improve the "check" testing utility: slightly refactor and provide
active/inactive checks.
2010-05-12 05:55:08 +00:00
Jonathan Earl Brassow
e0dd25f92b If we are checking if '--nosync' was called with the mirror argument,
then let's also check if '--mirrorlog' was called with the mirror
argument.
2010-05-11 21:40:11 +00:00
Zdenek Kabelac
81c155b465 Add pkgconfigdir for placement of .pc files
Use easily overideable make install pkgconfigdir variable.
2010-05-11 08:57:02 +00:00
Zdenek Kabelac
ae11cc6731 Switch to use Requires.private for devmapper.pc and lvm2app.pc
Use Requires.private: instead of Libs.private:
Use UDEV_PC and SELINUX_PC for Require.private:

It looks like usage of Requires.private is prefered from Libs.private.
However pkg-config documentation is really poor here. But here is
short outcome:

There is a difference in Libs.private: and Requires.private: where
we specify libselinux instead of  -lselinux -lsepol.

We leave resolving of query like 'pkg-config --libs --static devmapper'
on taking proper selinux and udev libs to their .pc files instead of
hardcoding them into our .pc file which is might give incorrect answer.
- i.e. dependency of libselinux package might change and we may return
wrong list of linked libraries.

http://bugs.freedesktop.org/show_bug.cgi?id=4738
http://err.no/personal/blog/tech/2008-03-25-18-07_pkg-config,_sonames_and_Requires.private
2010-05-11 08:54:11 +00:00
Zdenek Kabelac
b0b0c853c7 Add UDEV_PC and SELINUX_PC substituted variables 2010-05-11 08:48:43 +00:00
Zdenek Kabelac
8701c4f50c Use Require.private: field for devmapper-event.pc
Move devmapper to Require.private: field and remove unneeded libs
from Libs.private: as they are dependencies of devmapper library.
2010-05-11 08:47:02 +00:00
Zdenek Kabelac
627d5adc63 Skip unneeded 'cat' command execution. 2010-05-11 08:43:18 +00:00
Zdenek Kabelac
7a5a31888a Plugins do not use pthread or lvm2cmd directly.
Plugins are using pthread and lvm2cmd libraries indirectly
through devmapper-event-lvm2, so link only with libraries used by them.
2010-05-11 08:41:58 +00:00
Zdenek Kabelac
3d64743851 Link libdevmapper-event.so with libdevmapper.so.
For now using $(LIBS) for a list of linked libraries to $(LIB_SHARED) library.
2010-05-11 08:38:10 +00:00
Zdenek Kabelac
16d704877d Link liblvm2cmd.so with devmapper-event and devmapper libs.
and remove generic %.so: %.a target.
2010-05-11 08:34:38 +00:00
Zdenek Kabelac
3e320285a1 Headerfile <pthread.h> is no longer needed here 2010-05-11 08:32:22 +00:00
Alasdair Kergon
3215d89e97 Fix truncated total size displayed by pvscan. 2010-05-07 15:24:17 +00:00
Petr Rockai
b7983d4fe2 Add a basic test for dmeventd triggering lvconvert --repair. Needs improvement. 2010-05-06 19:01:26 +00:00
Petr Rockai
6120f4c507 Add some basic provisions for automated testing of dmeventd. 2010-05-06 18:54:51 +00:00
Peter Rajnoha
ac24748b47 Add new --sysinit option for vgchange and lvchange.
A shortcut for --ignorelockingfailure, --ignoremonitoring, --poll n options
and LVM_SUPPRESS_LOCKING_FAILURE_MESSAGES environment variable used all at
once in initialisation scripts (e.g. rc.sysinit or initrd).
2010-05-06 11:15:55 +00:00
Zdenek Kabelac
4c449886f6 Add dm_list_splice() to join two lists. 2010-05-06 10:10:15 +00:00
Zdenek Kabelac
cd3475a8cb Install plugins to subdirs
Target install_dm_plugin installs files to libdir/device-mapper.
Target install_lvm2_plugin installs files to libdir/lvm2.

Both targets creates relative links to libdir to keep the code
compatible with current dlopen handling.

Once we will be able to read plugins from subdir, links
could be removed.
2010-05-06 10:07:46 +00:00
Petr Rockai
03c5c5fbe7 What's new. 2010-05-05 22:38:31 +00:00
Petr Rockai
9d3faa4753 Suppress duplicate error messages about read failures and missing devices. 2010-05-05 22:37:52 +00:00
Peter Rajnoha
20399b50d6 Specify exactly which ioctl doesn't have a cookie set (for better debugging). 2010-05-03 22:08:38 +00:00
Peter Rajnoha
93c1312901 Synchronize "remove" dm task while reverting unsuccessful "create" dm task
(with table provided).

This remove ioctl generates udev events like any other hence it needs to be
synchronized properly as well. Also, add dm task type in debug log when
setting a cookie (for better debugging).
2010-05-03 21:06:53 +00:00
Alasdair Kergon
4bde7fc926 post-release 2010-04-30 15:48:38 +00:00
Alasdair Kergon
68086fdde4 typo 2010-04-30 14:51:58 +00:00
Alasdair Kergon
005ad96068 pre-release 2010-04-30 14:49:42 +00:00
Petr Rockai
6c1beb94dc Add a "should" alongside "not" to the test utilities. When a "should" command
fails, the test will carry on but will issue a warning. The harness detects
such warnings from tests and marks tests that passed with warnings with a
special status.
2010-04-30 14:33:39 +00:00
Alasdair Kergon
fa0ec772d5 change awk path 2010-04-30 13:58:08 +00:00
Peter Rajnoha
661e73d9a6 And be consistent with return code as well (previous commit). 2010-04-30 13:47:11 +00:00
Peter Rajnoha
db85b2ddc7 Don't run any complex initialisation for the "version" lvm2 command.
We can use it even in read-only environment where a try to initialise
file-based locking fails (not to mention other processing related with
lvm2 init). Simply, we want to output the version only, nothing else.
And this should always work.
2010-04-30 13:28:44 +00:00
Zdenek Kabelac
4f732b1989 Initialise _vginfos list staticaly so there is no problem with using uninitialised
variables in case, lvmcache_destory() is called without lvmcache_init().
2010-04-30 12:54:31 +00:00
Zdenek Kabelac
78b8276111 Release pools for regex if there is error during processing
(fixes error messages about unreleased pools).
2010-04-30 12:37:04 +00:00
Zdenek Kabelac
6ef6b6af40 Show string with invalid pattern
and start error message with capital letter.
2010-04-30 12:31:32 +00:00
Alasdair Kergon
f45bbeb497 . 2010-04-29 01:48:19 +00:00
Alasdair Kergon
4eb71369ed . 2010-04-29 01:45:30 +00:00
Alasdair Kergon
bc11c3a1d7 Remove no-longer-used arg_ptr_value.
Fix -M and --type to use strings not pointers that change on config refresh.
2010-04-29 01:38:12 +00:00
Jonathan Earl Brassow
dd8ac54678 Test all combinations of mirror conversion, both while the LV
is active and while it is in-active.

+for i in $(seq 0 4); do
+       for j in $(seq 0 4); do
+               for k in core disk mirrored; do
+                       for l in core disk mirrored; do

The testing code still needs some improvement.  I'd like to add
the ability to test specifying the PVs to be added/removed during
a convert.  It will also be important to test partial PV
specification during down converts (i.e. request to remove more
mirror images than we have provided PVs for).
2010-04-28 17:46:34 +00:00
Jonathan Earl Brassow
15d7886699 Don't attempt to convert the log type of an LV if the LV
is not a mirror.
2010-04-28 17:41:30 +00:00
Peter Rajnoha
68c7b86df1 Add support for new IMPORT{db} udev rule.
This rule appeared in udev v152 and it helps us to support spurious events
where we didn't have any flags set (events originated in udevadm trigger
or the watch rule). These flags are important to direct the rule application.
Now, with the help of this rule, we can regenerate old udev db content.
To implement this correctly, we need to flag all proper DM udev events with
DM_UDEV_PRIMARY_SOURCE_FLAG. That happens automatically for all ioctls
generating events originated in libdevmapper.
2010-04-28 13:37:36 +00:00
Zdenek Kabelac
6c8454e4e2 Small indent change 2010-04-28 12:23:11 +00:00
Jonathan Earl Brassow
a249779692 Fix comment from last commit. Additionally, there is no need
to put a comment into the WHATS_NEW file if it is a regression
that was created and fixed inside the same release window.
2010-04-27 15:26:58 +00:00
Jonathan Earl Brassow
876b6416ab Patch to fix bug 586021 and mantain historical behavior of
being able to remove more images from a mirror than the
number of PVs directly specified for removal.

The effort to fix bug 581611 corrected a bug that was unnoticed
at the time.  The loop in _remove_mirror_images that looks over
the specified PVs was allowing devices that were previously
counted and moved to the end of the list to be double-counted.
This resulted in the number of devices needed for removal always
being satisfied - even if the user did not specify enough PVs
for removal to satisfy the request.  When 581611 was fixed, this
double-counting no longer took place and the result was to remove
only the minimum of the number of PVs specified or the number
that was asked to be removed.

By simply always setting 'new_area_count' (as used to be done
only in the else statement), we return to the previous behavior.
Indeed, this is exactly what the double-counting was allowing
to happen before the fix of 581611.
2010-04-27 14:57:49 +00:00
Alasdair Kergon
1e5d79dce1 Mention how to get PV to LV mappings. 2010-04-27 13:34:56 +00:00
Alasdair Kergon
bd8eec0a1e Fix lvconvert error message when existing mirrored LV is not found. 2010-04-26 18:31:58 +00:00
Alasdair Kergon
e7075c4366 add comments 2010-04-26 18:12:40 +00:00
Peter Rajnoha
532834cbc2 Also include udev libs in libdevmapper.pc when udev_sync is enabled. 2010-04-26 09:05:50 +00:00
Mike Snitzer
d5857e6d48 Disallow the direct removal of a merging snapshot.
Allow lv_remove_with_dependencies() to know the top-level LV that was
requested to be removed (otherwise it recurses and we lose context).

A merging snapshot cannot be removed directly but the associated origin
can be.  Disallow removal of a merging snapshot unless the associated
origin is also being removed.
2010-04-23 19:27:10 +00:00
Mike Snitzer
b004ea2d7f Remove redundant check in _lvs_single now that the caller
(process_each_lv_in_vg) provides it.

Also, removing this check from _lvs_single now allows displaying hidden
LVs that are specifically named on the command line.
2010-04-23 19:10:20 +00:00
Peter Rajnoha
205ca4f8a7 Set appropriate udev flags for reserved LVs.
There's no need for foreign udev rules to touch LVM reserved devices
(snapshot, pvmove, _mlog, _mimage, _vorigin) even if they happen to
be visible. The same applies for /dev/disk content - no need to create
any content for these devices (and so no need to run any "blkid" etc.).
This also prevents setting any inotify "watch" from udev rules on such
devices that is a source of race conditions (the rules need to honor
DM_UDEV_DISABLE_OTHER_RULES_FLAG for this to work though).
2010-04-23 14:16:32 +00:00
Mike Snitzer
4a76f5c764 When removing a snapshot avoid preloading the origin if the
snapshot-merge target is not active.
2010-04-23 02:57:39 +00:00
Alasdair Kergon
a32e8441d0 don't optimise anything with TARGET_TRANS to avoid intefering with the matcher's counting 2010-04-22 23:09:18 +00:00
Alasdair Kergon
4d6089c042 Cache bitset locations to speed up _calc_states. (kabi) 2010-04-22 20:35:24 +00:00
Alasdair Kergon
7929929979 avoid ORs rightmost 2010-04-22 20:15:00 +00:00
Alasdair Kergon
9ba50046cf Move regex printing code from test to main tree (may use in debug messages).
Yet another optimiser fix attempt.
2010-04-22 17:42:38 +00:00
Jonathan Earl Brassow
aa3bdb1911 The following tests in the testsuite have race conditions:
1) Test that the primary mirror image cannot be removed while
   the mirror set is sync'ing.
2) Test that you cannot start a second mirror up-convert while
   one is already in progress.

The trouble is that if the sync/conversion finishes before the
tests occur, the tests will fail by why of success where there
should have been failure.  This means the sync/conversion must
happen very quickly, but this is possible because the test
mirrors we are creating are so small.

In order to decrease the likelyhood of these test failing (or
more correctly, failing to test the right thing), I've increase
the size of the mirrors.  It will still be remotely possible that
the tests will fail (by way of failing to test the right thing).
If this continues to happen, more involved mechanisms will need
to be put in place.  (Perhaps these will still be created, but
this change should be a remedy until that time.)
2010-04-22 15:39:40 +00:00
Alasdair Kergon
5ce8c9ae77 Don't walk rightmost through NULL pointers. 2010-04-22 14:33:14 +00:00
Alasdair Kergon
9d8d6b1d18 Fix rightmost rotation, and use LEFT and RIGHT to make symmetry more obvious. 2010-04-22 13:42:34 +00:00
Alasdair Kergon
52b8978a47 fix leftmost rotation 2010-04-22 13:18:27 +00:00
Alasdair Kergon
442fd05339 isprint 2010-04-22 03:42:00 +00:00
Alasdair Kergon
4c952f0af4 Still not satisfactory... 2010-04-22 03:24:24 +00:00
Alasdair Kergon
cf90b03ce4 add -r to print out closer to original regex format 2010-04-22 03:12:01 +00:00
Jonathan Earl Brassow
d4de2a7aa4 Disallow the addition of mirror images while a mirror up-convert
is already occurring.  The addition of new legs can be retried
once the current conversion is complete.
2010-04-21 14:04:24 +00:00
Jonathan Earl Brassow
e37b173f13 Disallow the primary mirror image from being removed when the
mirror is not in-sync.  This restriction is not extended to
repair operations (i.e. it will not limit what 'lvconvert --repair'
can do).
2010-04-21 13:55:08 +00:00
Zdenek Kabelac
b5d88e683f Make matcher_t and parse_t compilable
These two old-test/regex utils are usable for testing output of regex
processing at core level. For getting them usable configure need to create
Makefile. This is currently disable by default.
2010-04-21 08:01:51 +00:00
Alasdair Kergon
23dd4773d4 Add a regex optimisation pass for shared character prefixes. 2010-04-20 22:31:22 +00:00
Mike Snitzer
b7af7c9c43 Re-enable t-topology-support.sh
Reintroduce split teardown (teardown() calls teardown_devs()) because
t-topology-support.sh only needs the teardown_devs() subset of the full
teardown() between each iteration of the topology tests -- in particular
the $TESTDIR must not get removed between each topology test iteration.

prepare_loop() must return if prepare_scsi_debug_dev() already
established $LOOP.

Also fix (and simplify) the unsafe scsi-debug device discovery in
prepare_scsi_debug_dev().
2010-04-20 18:18:59 +00:00
Milan Broz
9d3955daf2 Remove -n argument from vgcfgrestore.
This is old lvm1 syntax, because arguments changed
from PVs to VGs compatibility is broken already.

name_ARG is not used in code.
2010-04-20 18:17:56 +00:00
Dave Wysochanski
063083a203 Add device creation to basic nightly test.
Ensure we can create devices for use in tests before running the real
tests.  This may in various cases, and may involve machine configuration
rather than a failure in some specific test.
For example, if a test is run with LVM_TEST_DIR=/tmp, selinux is enabled,
and the default security context is set to "<<none>>" for /tmp, all the
tests will fail, unable to create devices, since dmsetup will fail, a
result of machpathcon() returning an error code.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-20 15:19:36 +00:00
Christine Caulfield
46886164dd Add -S command to clvmd, so it can restart itself and still
preserve exlusive LV locks.
2010-04-20 14:07:37 +00:00
Alasdair Kergon
ea04c0775c Add dm_bitset_equal to libdevmapper. 2010-04-20 13:58:22 +00:00
Alasdair Kergon
d29aa4764e typo in comment 2010-04-20 12:18:31 +00:00
Alasdair Kergon
203d1d8371 Move function up file 2010-04-20 12:14:28 +00:00
Alasdair Kergon
2eb4099563 missed files from last commit 2010-04-20 10:58:18 +00:00
Mike Snitzer
7cd66377ef Add additional test to start the snapshot merge (which had just failed
because an FS was mounted -- origin was still open).
2010-04-19 22:44:42 +00:00
Alasdair Kergon
1f9d50fa3f Add dm_bit_and. (ejt) 2010-04-19 21:23:01 +00:00
Alasdair Kergon
be29e5ab62 fix last commit 2010-04-19 21:10:20 +00:00
Alasdair Kergon
24d8818933 Simplify dm_bitset_create. (ejt) 2010-04-19 21:08:32 +00:00
Alasdair Kergon
803380c203 Speed up dm_bit_get_next with ffs(). 2010-04-19 17:17:55 +00:00
Dave Wysochanski
1569059b2b Change lvm2app version number from 1 to 2.
This version number change reflects the memory handling change
for string-based pv/vg/lv string based attributes.
In addition, when adding support for tags, I forgot to increase
the version number.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-19 15:24:00 +00:00
Dave Wysochanski
8fb6bd930b Use vg->vgmem to allocate vg/lv/pv string properties instead of dm_malloc/fr
Everywhere else in the API the caller can rely on lvm2app taking care of
memory allocation and free, so make the 'name' and 'uuid' properties of a
vg/lv/pv use the vg handle to allocate memory.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-19 15:22:24 +00:00
Alasdair Kergon
c7cc4b7ac2 Change daemon lock filename from lvm2_monitor to lvm2-monitor for consistency. 2010-04-15 19:15:03 +00:00
Zdenek Kabelac
201d0e940b Install symbolic .so links with relative paths between usrlibdir and libdir. 2010-04-15 15:12:20 +00:00
Zdenek Kabelac
3fcd56034a Fix double DESTDIR usage for infodir and mandir. 2010-04-15 15:09:35 +00:00
Zdenek Kabelac
51d222469c Added awk script relpath.awk to calculate relative paths. 2010-04-15 15:06:26 +00:00
Alasdair Kergon
742d1e9424 post-release 2010-04-14 21:47:48 +00:00
Alasdair Kergon
a04f401227 touch file 2010-04-14 20:03:46 +00:00
Alasdair Kergon
62699dc3fd fix broken 'make install' for lvm_dump.sh 2010-04-14 20:03:15 +00:00
Alasdair Kergon
01645a7eae pre-release 2010-04-14 18:54:37 +00:00
Alasdair Kergon
b17939f74e pre-release 2010-04-14 18:53:04 +00:00
Alasdair Kergon
aa60387ecc . 2010-04-14 18:10:30 +00:00
Alasdair Kergon
4b801e21e3 pre-release 2010-04-14 17:50:49 +00:00
Dave Wysochanski
c198ca0285 Add missing readline linkage to lvm2app test. 2010-04-14 16:13:34 +00:00
Petr Rockai
a312fc8eb9 What's new. 2010-04-14 13:52:20 +00:00
Petr Rockai
10b62fb0e8 Allow incomplete mirror restore in lvconvert --repair upon insufficient space. 2010-04-14 13:51:58 +00:00
Peter Rajnoha
2c12325d63 Do not reset position in metadata ring buffer on vgrename and vgcfgrestore.
We should write metadata into next position in the ring buffer while calling
vgrename and vgcfgrestore. At this code level (_vg_write_raw), we were not able
to determine if this is a rename or not. If yes, then accompanying VG structure
passed here has a new name set, not the old one.

When looking for a location where to put metadata next, we were given a NULL
value because of failed VG name comparison (in _find_vg_rlocn) between the
name in existing metadata and metadata we're just about to write.

This resets the position in the ring buffer, overwriting any existing metadata
(and also incorrectly updates the cache to "orphan" afterwards).

This patch just adds old_name item in struct volume_group that we can check and use
if necessary and detect renames at lower layers as well.

The same applies for vgcfgrestore, but here we're using a special value of
old_name, an empty string, to disable the check with existing metadata totally.
2010-04-14 13:09:16 +00:00
Peter Rajnoha
2d8b9935a1 Allow VGs with active LVs to be renamed. 2010-04-14 13:03:06 +00:00
Peter Rajnoha
647d948562 Use UUIDs instead of names while processing event handlers.
Internally, we used DM names instead of UUIDs while processing event
handlers. This caused problems while trying to vgrename a VG with active LVs
where the names are being changed and so the devices were not found then.
The patch also contains a little bit of refactoring, moving "build_dlid" code
found in dev_manager.c to "build_dm_uuid", now in lvm-string.c (so we have
build_dm_uuid and build_dm_name at one place).
2010-04-14 13:01:38 +00:00
Petr Rockai
73d1626ddc SIGTERM testing clvmd before SIGKILL-ing it. 2010-04-14 12:04:23 +00:00
Alasdair Kergon
b7445913cc Only pass visible LVs to tools in cmdline VG name/tag expansions without -a 2010-04-14 02:19:49 +00:00
Alasdair Kergon
6f7ca25c9b Use typedefs for toollib process_each functions. 2010-04-13 23:57:41 +00:00
Dave Wysochanski
4487c42de2 Fix teardown in t-pvcreate-operation-md.sh nightly test. 2010-04-13 21:42:44 +00:00
Zdenek Kabelac
bfbad87685 Add error diagnostic for setenv failure. 2010-04-13 20:54:57 +00:00
Zdenek Kabelac
65f0ca65f1 Use C locales and use_mlockall for clvmd.
Use same steps for clvmd as for dmeventd - using C locales to avoid reading
large mmaps and use mlockall() for threaded version.
2010-04-13 19:54:16 +00:00
Dave Wysochanski
d0720d5638 Update WHATS_NEW 2010-04-13 18:18:54 +00:00
Dave Wysochanski
5d7fd71373 Add pv->vg to solidify link between a pv and a vg.
lvm2app needs a link back to the vg in order to use the vg handle for
memory allocations as well as other things.  This patch adds the field
to struct physical_volume, and sets pv->vg when reading a vg from disk or
extending a vg by using the helper function previously added,
add_pvl_to_vgs().  Moves and renames are handled with separate code
inside move_pv() and vgmerge().  Add pv->vg check to vg_validate().

A NULL value in pv->vg signifies membership in the orphan VG.
Note though in the case of pv_read() on a device with metadatacopies == 0,
more devices may need to be read for an authoritative answer.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-13 17:26:36 +00:00
Dave Wysochanski
efe3e72ed0 Use del_pvl_from_vgs() in vgreduce paths.
Somehow these got missed in earlier patches.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-13 17:26:20 +00:00
Dave Wysochanski
85f56aef97 Call add_pvl_to_vgs() and del_pvl_from_vgs() from more places.
Now that we have library functions to add/delete a pv from the vg->pvs
list, call them from everywhere.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-13 17:26:03 +00:00
Dave Wysochanski
5ac0fdd924 Add del_pvl_from_vgs() and move prototypes into metadata-exported.h
Add a delete function to manage the vg->pvs list.

NOTE: It may be possible to do further cleanup to these add/del functions
by passing a 'pv' as input instead of 'pv_list'.  The pv_list is used for
functions which do allocations (lvcreate) while other places in the code
just manage a list of 'pv' (e.g. import functions, vgextend, etc).

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-13 17:25:44 +00:00
Dave Wysochanski
b33c7a4597 Move increment of vg->pv_count from import_pool_vg() to import_pool_pvs().
Move the increment of vg->pv_count next to the place where we add to
vg->pvs.  It looks safe to do this since the only caller of import_pool_vg()
calls import_pool_pvs() immediately afterward, and there is no way
import_pool_vg() can fail (always returns 1).  However, if there's a
memory allocation failure inside import_pool_pvs(), we will end up with
a different count in vg->pv_count that with the original code.  In any
case, vg->pv_count should be as close to dm_list_size(&vg->pvs) as
possible, as is the case everywhere else in the code.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-13 17:25:26 +00:00
Dave Wysochanski
788b9bfdc7 Remove unnecessary parameter from import_pool_pvs().
The dm_list * parameter is unnecessary since we are passing in 'vg'
and the only caller of import_pool_pvs() passes '&vg->pvs' in the
dm_list * parameter.  Just use vg->pvs directly in the function.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-13 17:25:13 +00:00
Jonathan Earl Brassow
41cc3b216d Arguments to the --mirrorlog option are described in the synopsis as
{core|disk|mirrored}.  However, under the options section they are
described as {core|disk} - even though the 'mirrored' argument is
described.

s/'{core|disk}'/'{core|disk|mirrored}'/
2010-04-13 16:13:08 +00:00
Milan Broz
dcb7878842 Mask LCK_HOLD flag in cluster VG locks for compatibility reasons. 2010-04-13 14:36:24 +00:00
Petr Rockai
b5e83471bc Add check_cluster and check_local targets in addition to just "check". 2010-04-13 13:28:52 +00:00
Petr Rockai
bfdc05ec71 Process all core dumps that exist upon a test failure (this would hopefully
include eg. clvmd cores, apart from the usual lvm ones).
2010-04-13 08:01:53 +00:00
Petr Rockai
1d7ecbe9a0 Fix a reversed exit status from test harness. 2010-04-13 07:34:19 +00:00
Petr Rockai
9e30a829d5 Optionally disable the verbose repeat of a failed test (export
LVM_TEST_NOVERBOSE=1).
2010-04-13 07:33:34 +00:00
Petr Rockai
698dfbf953 Disable the rather-dangerous prepare_scsi_debug_dev in the testsuite. 2010-04-13 07:02:14 +00:00
Petr Rockai
83fbb1ec11 Make it easy to run just a subset of tests by saying make check T="regex". 2010-04-13 06:45:53 +00:00
Petr Rockai
19d0f2b57c Fix t-lvcreate-usage, as lvcreate ... -i1 no longer prints "Redundant stripes
argument: default is 1".
2010-04-13 06:25:08 +00:00
Petr Rockai
05d40b3142 For the testsuite, set a default polling interval to 0 in lvm.conf. Cuts down
testing time considerably.
2010-04-13 06:24:20 +00:00
Alasdair Kergon
d94fc9ce62 Add --stripes to lvconvert too. 2010-04-13 01:54:32 +00:00
Alasdair Kergon
ba9550f473 Add activation/polling_interval to lvm.conf as --interval default. 2010-04-13 01:43:56 +00:00
Petr Rockai
28e1e7fd07 More testsuite helpers for check.sh. 2010-04-12 19:33:58 +00:00
Petr Rockai
904e38837e Absorb t-mirror-lvconvert-usage into t-lvconvert-mirror. 2010-04-12 19:32:58 +00:00
Petr Rockai
697c5de267 Explode t-mirror-basic into a number of smaller tests. 2010-04-12 19:16:24 +00:00
Petr Rockai
bfa9c7bcfb Add a new helper script for the testsuite, that makes checking for various
(lvm) properties easier and produces nicer output.
2010-04-12 19:02:59 +00:00
Petr Rockai
2f81a05ce5 Do not pollute /tmp with testing byproducts. 2010-04-12 19:00:23 +00:00
Alasdair Kergon
7b994976e3 Don't ignore error if resuming any LV fails in resume_lvs.
Skip closing persistent filter cache file if open failed.
2010-04-12 11:52:53 +00:00
Zdenek Kabelac
662d1021e6 Update install rules for udev.
Fix unwanted modification of $(top_builddir)/make.tmpl.

Using dependency rules to install rules for udev.
There is minor problem, with concurent usage of builddir
and srcdir could lead to missuse of 10-dm.rules which
could be found in VPATH from different builddir.
However current solution uses intermediate target so
the generated 10-dm.rules exists only for short period of time
during make install execution.
2010-04-09 21:44:28 +00:00
Zdenek Kabelac
2db34bd266 INSTALL rules updates
Patch is inspired by Debian's extra patch.

- removes OWNER & GROUP make vars they are parts of INSTALL command.
- adds INSTALL_PROGRAM for executable, uses $(INSTALL)
- adds INSTALL_DATA for non-executable data, uses ($INSTALL)
- adds INSTALL_WDATA for writable non-executable data, uses ($INSTALL)
- adds configure option --enable-write_install - to support
  installatin of writable files used by distribution
- replaces usage of ifeq @LIB_SUFFIX@ with $(LIB_SUFFIX)
- installs .a files from static builds without executable flag
- installs .a files to $(usrlibdir) instead of $(libdir)
- installs all static binaries to $(staticdir)
- create .so links for devel package in $(usrlibdir) instead of
  $(libdir)
- makes .so and .so.LIB_VERSION files within builddir
- removes VERSIONED_SHLIB and created versioned LIB_SHARED automagicaly
- install LIB_SHARED via install_lib_shared target
- install plugins via install_lib_shared_plugin target
- prints whole 'install' command during installation instead of less
  informative "Installing  $(something) $(somewhere)"
- install multiple man pages with one INSTALL command
- use DISTCLEAN_TARGETS instead of creating multiple distclean targets
2010-04-09 21:42:48 +00:00
Zdenek Kabelac
f3491d93a0 Use vpath instead of VPATH.
Usage of VPATH makes troubles when used within $(builddir).
Not only source files are being found through VPATH,
but targets as well. (make --debug=v)

Thus if user builds the code in $(srcdir) and also in some $(builddir)
he gets mangled results as some generated files (i.e. .export.sym)
are 'reused' from $(srcdir) instead of $(builddir).

This patch switches to use vpath were we could explicitly name
suffixes that should be looked via vpath - we must take care,
we do not generate files with these suffixes:
.c, .in, .po, .exported_symbols
2010-04-09 21:34:25 +00:00
Alasdair Kergon
337abee8f8 Permit mimage LVs to be striped in lvcreate and lvresize. 2010-04-09 01:00:10 +00:00
Dave Wysochanski
34573cf6a5 Check for duplicate paths (pvids) on the commandline of vgcreate.
A user specifying duplicate paths on the cmdline of vgcreate will
get a message similar to the following:
vgcreate vgtest2 /dev/loop3 /dev/loop5
  Found duplicate PV jk1lXsKzwyOKlXq6bhaFFKMQQ06oPgu8: using /dev/loop5 not /dev/loop3
  Found duplicate PV jk1lXsKzwyOKlXq6bhaFFKMQQ06oPgu8: using /dev/loop3 not /dev/loop5
  Internal error: Duplicate PV id jk1lXs-Kzwy-OKlX-q6bh-aFFK-MQQ0-6oPgu8 detected for /dev/loop3 in vgtest2.

This is caught by vg_validate(), but it would be good to find
this condition earlier in the vgcreate code.  add_pv_to_vg()
currently checks by pvname, but does not look for duplcate pvids.
This patch adds the check for duplicate pvids and results in new
error output as follows:
vgcreate vgtest2 /dev/loop3 /dev/loop5
  Found duplicate PV jk1lXsKzwyOKlXq6bhaFFKMQQ06oPgu8: using /dev/loop5 not /dev/loop3
  Found duplicate PV jk1lXsKzwyOKlXq6bhaFFKMQQ06oPgu8: using /dev/loop3 not /dev/loop5
  Physical volume '/dev/loop5 (jk1lXs-Kzwy-OKlX-q6bh-aFFK-MQQ0-6oPgu8)' listed more than once.
  Unable to add physical volume '/dev/loop5' to volume group 'vgtest2'.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-08 15:18:35 +00:00
Zdenek Kabelac
491ef790c2 Add cleandir target.
Using non-recursive cleandir target for resursive distclean and clean.
Avoids duplicated clean invocation during distclean.
Indent $(RM) parameters.
2010-04-08 09:15:37 +00:00
Alasdair Kergon
2e9ae681c5 And another test that should have been failing - normal allocation policy
does not put stripes on the same disk as each other!
2010-04-08 01:58:06 +00:00
Alasdair Kergon
a02f7ab0cb another incorrect test that should have been failing before 2010-04-08 01:43:46 +00:00
Alasdair Kergon
e0b190c111 missing ?: 2010-04-08 00:56:26 +00:00
Alasdair Kergon
56fa3ac1b0 suppress bogus compiler warning 2010-04-08 00:52:41 +00:00
Alasdair Kergon
5bcbeca8e7 Fix pvmove allocation to take existing parallel stripes into account.
When moving parts of striped LVs, pvmove wouldn't care about leaving you with
two stripes on the same disk.  Now --alloc anywhere is needed for that.
(Tried and gave up on two alternative approaches before the one committed here.)
2010-04-08 00:28:57 +00:00
Alasdair Kergon
adc1729212 Only fail if the top-level LV fails to be deactivated - allow deactivation
of its dependencies to fail.
2010-04-07 23:51:34 +00:00
Petr Rockai
fd7ae0cdf4 Don't forget to cd into $TESTDIR in test-utils.sh / prepare_testdir. 2010-04-07 21:38:01 +00:00
Alasdair Kergon
fdef9679b6 Issue a message if the new type of deactivation failure happens.
If this can happen during 'normal' operations, I need to know.
2010-04-07 21:25:09 +00:00
Petr Rockai
dcecda51d8 Fix a mis-override of $PWD in test-utils.sh. 2010-04-07 21:19:20 +00:00
Alasdair Kergon
687603a144 Fix incorrect removal of symlinks after LV deactivation fails. 2010-04-07 20:04:41 +00:00
Petr Rockai
acb04c003e Set ulimit -c to unlimited to allow coredumps to be collected and analysed in
the testsuite.
2010-04-07 16:04:22 +00:00
Petr Rockai
86b10089aa Avoid spurious skips in the testsuite due to obsoleted
dmsetup_has_dm_devdir_support_.
2010-04-07 16:00:19 +00:00
Milan Broz
7b030379ec Wipe dm-ioctl parameters in memory after use. 2010-04-07 15:57:20 +00:00
Petr Rockai
5e8c230d1f Refactor the test utilities, dropping the legacy test-lib.sh and curtailing
lvm-utils.sh. Clears up lots of unused code, should have little observable
impact (it does change test directory layout slightly).
2010-04-07 14:46:26 +00:00
Petr Rockai
8c2890cf1d In test harness, use fwrite(3) in place of write(2) to avoid mixing fd-level
and FILE-level IO on stdout.
2010-04-07 09:48:11 +00:00
Petr Rockai
8e9da71c32 Keep the testsuite stats correct in spite of failed-test repetition. 2010-04-07 09:41:33 +00:00
Petr Rockai
13deb51d3e Move a fragile testcase toward the end of t-mirror-lvconvert.sh. 2010-04-06 23:04:45 +00:00
Petr Rockai
2c9ffffdca Put back the `lvconvert -m+1 --mirrorlog disk' test in t-lvconvert-mirror.sh. 2010-04-06 22:22:26 +00:00
Alasdair Kergon
13a9bce647 disable another broken check in a test 2010-04-06 18:13:43 +00:00
Alasdair Kergon
dbb3cfe05e Fix is_partitioned_dev not to attempt to reopen device. 2010-04-06 17:36:41 +00:00
Alasdair Kergon
674baa8143 Disable broken test. 2010-04-06 16:30:53 +00:00
Christine Caulfield
db482f3e1c Fix a thread race in clvmd that could cause lockups on very busy systems 2010-04-06 15:29:30 +00:00
Alasdair Kergon
6a5a0222d7 Display PVs created during tests 2010-04-06 14:25:07 +00:00
Zdenek Kabelac
4512267bf1 Using 'not' if the test 'should' fail. 2010-04-06 14:24:13 +00:00
Dave Wysochanski
54555a6fab Update WHATS_NEW 2010-04-06 14:07:11 +00:00
Dave Wysochanski
ae2353caf8 Add add_pvl_to_vgs() - helper function to add a pv to a vg list.
Small refactor of main places in the code where a pv is added to a
vg into a small function which adds the pv to the list and updates
the vg counts.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-06 14:04:54 +00:00
Dave Wysochanski
4eae19ed75 Refactor format1 vg->pvs list add and vg->pv_count.
Refactor adding to the vg->pvs list and incrementing the count, which
will allow further refactoring.  Should be no functional change.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-06 14:04:20 +00:00
Dave Wysochanski
e6ec837203 Refactor _read_pv() code that updates vg->extent_count and vg->free_count.
Simple refactor to mov code that updates the vg extent counts from a
single pv's counts close to the code that adds a pv to vg->pvs and
updates vg->pv_count.  No functional change.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-06 14:04:03 +00:00
Dave Wysochanski
b37f7c4b7b Add pv to vg->pvs after check for maximum value of vg->extent_count.
In add_pv_to_vg(), we should only add the pv to vg->pvs after all
internal checks have passed.  The check for vg->extent_count exeeding
maximum was after we added the pv to the list, so this function could
return a state of vg->pvs that did not reflect other parameters such
as vg->pv_count.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-04-06 14:03:43 +00:00
Zdenek Kabelac
4b8340327c As lcov module is not installed with GD.pm dependency we need
to check for presence of this module and avoid using --frames
option for genhtml in this case.

Fix arg list for AC_PATH_PROG for lcov and genhtml.
(detecting empty LCOV and GENHTML string in Makefiles).
2010-04-06 11:53:53 +00:00
Zdenek Kabelac
92e4442d4a Distcleanup config files only in toplevel directory.
Do not execute 'rm -r' with empty $(DISTCLEAN_DIRS).
lvm-version.h is already cleaned with subtarget distcleaning.
Always distcleanup lcov_reports*.
2010-04-06 09:54:11 +00:00
Zdenek Kabelac
2517f9adf2 Fix lcov target
Patch fixes generation of coverage files for dmeventd and adds support for clvmd.
Path names are stripped, so the the html looks better.
Frames 'previews' is enabled for generated pages.
Using top_srcdir was wrong here - though we still can't easily use builddir.
Requiers using shell variables before execution of binaries build outside
of srcdir.
2010-04-06 09:50:07 +00:00
Alasdair Kergon
9bad64ff72 remove compiler warning 2010-04-02 01:35:34 +00:00
Alasdair Kergon
d1b147e2bb A few more log_error to log_warn changes for mirrors. 2010-04-01 14:54:37 +00:00
Zdenek Kabelac
1b7730c76a Better debug message for Un/Locked memory size. 2010-04-01 14:53:47 +00:00
Alasdair Kergon
28801f7470 temporarily downgrade the 'open while suspended' error till we fix it properly 2010-04-01 14:30:51 +00:00
Alasdair Kergon
f4c43c11c9 Try to fix tracking of whether or not log extents need allocating. 2010-04-01 13:58:13 +00:00
Zdenek Kabelac
fde4a8e1b5 Set ret value to success initially. 2010-04-01 13:43:12 +00:00
Alasdair Kergon
abb193533c Avoid endless loop if lv->segments list is corrupted 2010-04-01 13:08:06 +00:00
Alasdair Kergon
775fac97d3 initialise log_allocated to 0 2010-04-01 12:29:07 +00:00
Alasdair Kergon
3963c01760 Limit number of error messages when checking LV segments. 2010-04-01 12:14:20 +00:00
Alasdair Kergon
1e01a63685 Improve vg_validate to detect some loops in lists. 2010-04-01 11:45:36 +00:00
Alasdair Kergon
235ca704b1 Improve vg_validate to detect some loops in lists. 2010-04-01 11:43:24 +00:00
Alasdair Kergon
5685ba7918 Change most remaining log_error WARNING messages to log_warn. 2010-04-01 10:34:09 +00:00
Petr Rockai
f0d1c085b9 Do not pass NULL to setenv in the test harness. 2010-03-31 23:11:12 +00:00
Zdenek Kabelac
45490081c7 Missed to convert T -> SCRIPTS 2010-03-31 23:05:20 +00:00
Petr Rockai
a3503d13e5 Re-run failing tests with log/verbose=4 (-vvvv) to help with debugging. 2010-03-31 22:18:17 +00:00
Alasdair Kergon
3ed4d07d3a remove unused var 2010-03-31 20:39:51 +00:00
Alasdair Kergon
06e0023c2c Attempt to fix non-ALLOC_ANYWHERE allocation code after recent changes broke
The preference given to the PVs with the largest free areas.
2010-03-31 20:26:04 +00:00
Milan Broz
b22e0a3ecb Always use blocking lock for VGs and orphan locks.
Because we have now strong rule for lock ordering:
 - VG locks must be taken in alphabetical order
 - ORPHAN locks must be the last
vgs_locked() is now not needed.

This fixes problem with orphan locking, e.g.
vgremove VG1    |    vgremove VG2
lock(VG1)       |    lock(VG2)
lock(ORPHAN)    |    lock(ORPHAN) -> fail, non-blocking

https://bugzilla.redhat.com/show_bug.cgi?id=578413

(More similar places in code.)
2010-03-31 17:23:56 +00:00
Milan Broz
0802006ea7 Fix all segments memory is allocated from vg private mempool.
Physical segments were still allocated from global
command context mempool.

This leads to very high memory usage when
activating large VG (vgchange).
(Memory usage was about 2G when >3000LVs).

Fix it by properly using vg->vgmem private pool,
so all the memory is released early.

New memory pool parameter is needed here for pv_split_segment
function.

Also fix the same problem in some minor allocations
(vg description, lv segment split).
2010-03-31 17:23:18 +00:00
Milan Broz
29a21d1627 Do not traverse PV segment list twice.
In addition to previous patch, we really do not need
to search for segment which was just allocated in
split request.

Make pv_split_segment function return newly allocated
(split) segment also.

(So after this patch, there is only one user
of slow find_peg_by_pe).
2010-03-31 17:22:26 +00:00
Milan Broz
a0074aa07e Optimise PV segments search.
The function find_peg_by_pe is incredibly inefficient
for Pvs with many segments.

In shiny future there should be binary (or interval) tree
instead of sorted linked list (volunteers?).

Anyway, for now, we can use dirty trick here to optimise this case:

 - Allocations are usually applied from the beginning
 of PV (we have no alloocation policy which allocates areas
 "backwards")

 - The only user of find_peg_by_pe is pv_split_segment()
 call. In *most* cases it need to split *last* PV segment.

So if we search sorted pv segment list backwards, we
hit the requested segment immediatelly.

This patch applies this tiny change.
(and saves >30% of processing time when >3000LVs segments are on one PV!)

To discourage using this inefficient function from other code,
it is moved to pv_manip.c and used static for now:-)
2010-03-31 17:21:40 +00:00
Milan Broz
4e8859d851 Remove vg_validate call when parsing cached metadata.
vg_validate call is an adept to optimisation, it is very
ineeficient and slow.

Anyway, we should call it only before writing data to disk.

The call in lvmcache was just temporary validation,
we realy do not need to revalidate cached metadata
every time.
(Actually, I added that there just to prove that cache works
properly and forgot to remove it.)

Patch removes it from lvmcache completely, this can hit only
internal bug in export function (and this bug must
be detected in any vg_write call anyway before).
2010-03-31 17:20:44 +00:00
Milan Broz
562ae607d2 Use hash table for quick lv reference when reading metadata.
The _read_vg uses already hash for PVs to optimise
reading of large VGs and avoiding repeated PV list traversing.

Use the same aproach to speed up parsing VG with many LVs.
2010-03-31 17:20:02 +00:00
Mikulas Patocka
2426656b07 A missing space in the error message.
Add missing parentheses to an error message

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
2010-03-31 12:06:30 +00:00
Mikulas Patocka
898d3c655d Don't kill the parent if debugging.
If dmeventd runs with -d flag, it doesn't fork into backgroud.
The command kill(getppid(), SIGTERM) attempts to kill the parent dmeventd
process, however, if there is no parent, it kills whatever process spawned
dmeventd. In case of debugging with gdb, the parent is gdb, thus
kill(getppid(), SIGTERM) kills the debugger.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
2010-03-31 12:01:49 +00:00
Zdenek Kabelac
855e554051 Update for make install. 2010-03-31 07:43:41 +00:00
Zdenek Kabelac
4aac2c543b Install generated 10-dm.rules from builddir.
Patch just check whether file is generated in builddir otherwise srcdir file
is used.
2010-03-31 07:40:20 +00:00
Zdenek Kabelac
78024e7be6 Use .commands created in builddir for symlink installation. 2010-03-31 07:37:16 +00:00
Jonathan Earl Brassow
18f0009a41 We only need one extent for the mirror log. So, when specifying
the devices on which to place the individual mirror parts, we can
specify the log devices as $dev:x instead of $dev:x-y.
2010-03-31 03:56:25 +00:00
Jonathan Earl Brassow
1cf64de2bf For the mirror repair tests, we should use --ignoremonitoring because
we are running the repair manually.  If we don't ignore, then dmeventd
and the manually run repair can collide.  (We should still get clean
results in such a case, but it makes it harder to validate the test
results.)
2010-03-31 02:36:03 +00:00
Zdenek Kabelac
b1d6be8ec6 Update for dmeventd changes. 2010-03-30 14:44:57 +00:00
Zdenek Kabelac
137a25906d Count only readable size for memlock stats.
As we mlock() only readable pages, makes statistics only
for readable bytes.
2010-03-30 14:41:58 +00:00
Zdenek Kabelac
2b298cbb32 Update memlock
Code moves initilization of stats values to _memlock_maps().
For dmeventd we need to use mlockall() - so avoid reading config value
and go with _use_mlockall code path.

Patch assumes dmeventd uses C locales!
Patch needs the call or memlock_inc_daemon() before memlock_inc()
(which is our common use case).

Some minor code cleanup patch for _un/_lock_mem_if_needed().
2010-03-30 14:41:23 +00:00
Zdenek Kabelac
5361b5de37 Force C locale
As we need to use mlockall() enforce "C" locales for dmeventd.
2010-03-30 14:40:30 +00:00
Zdenek Kabelac
f1fee292e4 Updated syslog messages
Use our common '.' end format for syslog messages.
2010-03-30 14:39:55 +00:00
Zdenek Kabelac
9c9b1f5bc8 Release pool in the same reversed order
and with lowered priority after _memlock_dec.
2010-03-30 14:38:56 +00:00
Zdenek Kabelac
7e64d04db1 Fix resouce leak in error path
If the error path of _register_for_event() calls _free_thread_status()
_lib_put() call is missing.
To make thing simpler move this _lib_put() into common error path code.
2010-03-30 14:37:28 +00:00
Zdenek Kabelac
2355f677fb Remove mlockall() form dmeventd
As the header file <sys/mman.h> was not included in dmeventd.c
thus missed definition of MCL_CURRENT so this patch only makes
it obvious we were not locking memory here.

This patch has no functional change.
Later part of this patch set handles mlockall() via memlock_inc_daemon().
2010-03-30 14:35:40 +00:00
Alasdair Kergon
e42bb91382 Fix --alloc contiguous policy only to allocate one set of parallel areas. 2010-03-29 17:59:46 +00:00
Petr Rockai
28aa06675f Work around a problem in t-mirror-lvconvert: different PV order on the
commandline of lvconvert can lead to allocation failures even if enough space
is available. A separate testcase demonstrating the problem will follow.
2010-03-29 16:50:27 +00:00
Petr Rockai
6e86d0bb03 Enforce distinct-PV allocation of snapshot/origin pairs in vgsplit test. 2010-03-29 16:40:51 +00:00
Mike Snitzer
4342431e39 Do not allow {vg|lv}change --ignoremonitoring if on clustered VG.
clvmd does not propagate DMEVENTD_MONITOR_IGNORE.

Update get_activation_monitoring_mode() to check if the VG that the
LV is being activated in is clustered.  If so, skip it.

Any get_activation_monitoring_mode() error will cause the associated LV
(or VG) to be skipped during activation.  Both vgchange_single() and
lvchange_single(), which call get_activation_monitoring_mode(), are
called by their respective process_each_..() method.
2010-03-29 16:09:40 +00:00
Zdenek Kabelac
7452298131 Fix also error code from clean:
And fix previous commit which missed test.
2010-03-29 15:53:11 +00:00
Zdenek Kabelac
e4be66b1ac Oops, avoid returning errors from shell to makefile for builddir == srcdir 2010-03-29 15:39:25 +00:00
Zdenek Kabelac
f829547a13 Avoid modification of .in files outside man directory.
(i.e. this rule actually tried to change ../make.tmpl in some cases and
left this file completely broken)
2010-03-29 14:22:00 +00:00
Zdenek Kabelac
ea64fe3b51 Split long line in Makefile and keep $abs_top_buildir as shell variable
within init.sh.
2010-03-29 14:19:42 +00:00
Zdenek Kabelac
f293d0b951 Fixing another set of distclean problems where we left some generated files
in clvmd, dmevend, man, tests.

Don't include dependency files for clow and cscope.out targets

Improve dependency tracking for dmeventd and liblvm2cmd sources.
2010-03-29 14:17:59 +00:00
Zdenek Kabelac
1967fd2429 Update cflow file generation - support build dir and use $(top_srcdir)
to obtain sources. Create make.tmpl target for
simplier generation of cflow files with the help of
CFLOW_LIST, CFLOW_LIST_TARGET, CFLOW_TARGET.
Still cflow usage is not perfect.
2010-03-29 14:11:17 +00:00
Zdenek Kabelac
0fec0340c2 distclean fixes
Move daemons/ and lib/ subtargets to their Makefiles so we don't get
double cleanup error during execution of distclean target.
Instead of duplicating clean target inside distclean target,
just use it as a subtarget and avoid add duplicating code.
2010-03-29 14:09:25 +00:00
Zdenek Kabelac
bed67320b1 Use $(top_srcdir) for sources and add cscope.out to distclean targets. 2010-03-29 14:07:56 +00:00
Zdenek Kabelac
8544e24770 Add $(LIB_STATIC) to TARGETS so it's cleaned in the same way
as other libraries in project.
Add dmeventd.gcda dmeventd.gcno to CLEAN_TARGETS.
2010-03-29 14:07:01 +00:00
Zdenek Kabelac
727b7b7e8c Avoid hard sed replacement - i.e. quick test change in make.tmpl
could avoid recofiguration steps in same debug cases.
2010-03-29 14:06:06 +00:00
Zdenek Kabelac
bd39789144 Fixing compilation warning: implicit declaration of function ‘umask’ 2010-03-29 14:05:17 +00:00
Petr Rockai
eb10fc962b When a scsi_debug modprobe fails, skip the test instead of failing it. 2010-03-28 15:52:04 +00:00
Jonathan Earl Brassow
3318c41356 Add ability to create mirrored logs for mirror LVs.
This check-in enables the 'mirrored' log type.  It can be specified
by using the '--mirrorlog' option as follows:
#> lvcreate -m1 --mirrorlog mirrored -L 5G -n lv vg

I've also included a couple updates to the testsuite.  These updates
include tests for the new log type, and some fixes to some of the
*lvconvert* tests.
2010-03-26 22:15:43 +00:00
Mike Snitzer
dc2c0b1d51 Use a real socket for singlenode clvmd to fix clvmd's high cpu load. 2010-03-26 15:45:36 +00:00
Mike Snitzer
1de8da23f5 Fix clvmd cluster propagation of dmeventd monitoring mode.
clvmd's do_lock_lv() already properly controls dmeventd monitoring based
on LCK_DMEVENTD_MONITOR_MODE in lock_flags -- though one small fix was
needed for this to work: _lock_for_cluster() must treat
dmeventd_monitor_mode()'s return as a tri-state value.

Also cleanup do_lock_lv() to:
- explicitly init_dmeventd_monitor() based on LCK_DMEVENTD_MONITOR_MODE
- no longer reset init_dmeventd_monitor() to default at the end of
  do_lock_lv() -- it is unnecessary
2010-03-26 15:40:13 +00:00
Zdenek Kabelac
340fa684af Updates .so links for plugins 2010-03-26 13:21:28 +00:00
Alasdair Kergon
34f06fa400 Allow ALLOC_ANYWHERE to split contiguous areas. 2010-03-25 21:19:26 +00:00
Alasdair Kergon
5b5337e4af . 2010-03-25 18:22:39 +00:00
Alasdair Kergon
d01de820d7 Use INTERNAL_ERROR definition consistently in internal error messages. 2010-03-25 18:22:04 +00:00
Alasdair Kergon
5aeea69c01 Add some assertions to allocation code. 2010-03-25 18:16:54 +00:00
Alasdair Kergon
8f6c0b24d8 more diagnostics 2010-03-25 12:16:17 +00:00
Alasdair Kergon
b8eddf1bdc more diagnostics 2010-03-25 12:14:14 +00:00
Alasdair Kergon
1469dc216a add debug mesg 2010-03-25 11:57:16 +00:00
Alasdair Kergon
3306ae2f42 . 2010-03-25 11:48:54 +00:00
Alasdair Kergon
a89db8e69c add debug mesgs and attempt to control which device is used for log as perhaps intended 2010-03-25 11:47:00 +00:00
Alasdair Kergon
8c6f06eb34 add debug output and attempt to control which device gets log as intended perhaps 2010-03-25 11:42:17 +00:00
Alasdair Kergon
fe3ed2b871 improve a few comments in last check-in 2010-03-25 02:40:09 +00:00
Alasdair Kergon
71cb3fc201 Introduce pv_area_used into allocation algorithm and add debug messages.
This is the next preparatory step towards better --alloc anywhere
support and is not intended to break anything that currently works so
please report any problems - segfaults, bogus data in the new debug
messages, or if the code now chooses bizarre allocation layouts.
2010-03-25 02:31:48 +00:00
Mike Snitzer
f9554c7d27 Revert having clvmd consult the lvm.conf "activation/monitoring".
Correctly implementing the intent of this change requires more
analysis.
2010-03-24 22:25:11 +00:00
Zdenek Kabelac
384f2fc38b Move declaration of struct dm_info info to declaration block. 2010-03-24 11:36:48 +00:00
Mike Snitzer
cd50107ae1 Improve activation monitoring option processing
. Add "monitoring" option to "activation" section of lvm.conf
. Have clvmd consult the lvm.conf "activation/monitoring" too.
. Introduce toollib.c:get_activation_monitoring_mode().
. Error out when both --monitor and --ignoremonitoring are provided.
. Add --monitor and --ignoremonitoring support to lvcreate.  Update
  lvcreate man page accordingly.
. Clarify that '--monitor' controls the start and stop of monitoring in
  the {vg,lv}change man pages.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2010-03-23 22:30:18 +00:00
Petr Rockai
94466645ee Also honour abort_on_internal_errors when log_fn is set. 2010-03-23 18:18:49 +00:00
Peter Rajnoha
1acb442f60 UDEV_SYNC_SUPPORT, not UDEV_SYNC! 2010-03-23 15:13:03 +00:00
Alasdair Kergon
cff0a562fd Allow dynamic extension of array of areas selected as allocation candidates. 2010-03-23 15:07:55 +00:00
Peter Rajnoha
299db29482 Export and use only valid cookie value in test suite. 2010-03-23 14:47:35 +00:00
Peter Rajnoha
cc5045d8ca Autoreconf.
(Strictly require libudev if udev_sync is used)
2010-03-23 14:44:42 +00:00
Peter Rajnoha
940c32c5dd Strictly require libudev if udev_sync is used.
This prevents some confusion when libudev was not found so udev_sync was disabled
automatically. Configure was successful though giving only a tiny warning.

Also, if "dmsetup udevcreatecookie" is used, never return 0x000000 as a result if
udev is not running and keep the output blank.
2010-03-23 14:43:18 +00:00
Peter Rajnoha
14ad41a065 Add support for ioctl's DM_UEVENT_GENERATED_FLAG.
We need to know whether we should wait for any uevent or not when
using udev_sync. A kernel patch was posted recently that changed the
way uevents are sent on dm device resume - it is sent only if the
device has been suspended before. There's also a new DM_UEVENT_GENERATED_FLAG
in the ioctl to notify userspace whether the event was generated.

If the uevent was not generated (e.g. the situation where the device is
*not* suspended and we call a resume), we just call dm_udev_complete
explicitly from within libdevmapper itself to prevent infinite waiting
while trying to synchronise with udev processing.
2010-03-23 14:38:37 +00:00
Alasdair Kergon
7eaf1f294d Avoid duplicate definitions. 2010-03-23 14:35:08 +00:00
Zdenek Kabelac
eb7692743f Remove const modifier for struct volume_group* from process_each_lv_in_vg().
Content of this pointer is not const during this function.
2010-03-23 14:24:04 +00:00
Mike Snitzer
7aa470c8d0 Don't allow resizing of internal logical volumes.
Prevent lvresize from being able to resize internal LVs: mirror legs
(*_mimage_*), mirror log (*_mlog), snapshot placeholder LVs (snapshot*)
and others.  Resizing these would leads to unexpected metadata and
sometimes crashes (in case of growing snapshot*).
2010-03-20 03:44:04 +00:00
Alasdair Kergon
4237ca85f5 Fix libdevmapper-event pkgconfig version string to match libdevmapper. 2010-03-19 18:33:55 +00:00
Dave Wysochanski
99cdfa1e71 Update WHATS_NEW for last checkin. 2010-03-18 17:32:25 +00:00
Dave Wysochanski
39f92f3cfd Avoid scanning all pvs in the system if operating on a device with mdas.
When we pv_read() a device that has an orphan vgname, we might need to scan
the system to be sure this is true.  However, if the PV has mdas, there's
no way possible for it to have an orphan vgname unless it is a true orphan.
Some areas of the code were optimized to take advantage of this fact, while
others were not (we would still do the expensive scan if a device had mdas
but had an orphan VG).

This patch unifies the code so that every place we are operating on such
a PV, we skip the expensive scan if there are mdas.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Acked-by: Petr Rockai <prockai@redhat.com>
Acked-by: Alasdair G Kergon <agk@redhat.com>
2010-03-18 17:29:12 +00:00
Alasdair Kergon
9cc29191d4 autoreconf & add missing WHATS_NEW entry 2010-03-18 13:24:35 +00:00
Petr Rockai
de33332491 Run both locking_type 1 and 3 tests (forgot to uncomment locking_type 1 before
last checkin).
2010-03-18 09:28:42 +00:00
Petr Rockai
f9ddafdc21 Skip locking_type 3 tests instead of failing when system-wide clvmd is running. 2010-03-18 09:27:39 +00:00
Petr Rockai
fdefd5eff9 Add infrastructure for running the functional testsuite with locking_type set
to 3, using a local (singlenode) clvmd.
2010-03-18 09:19:30 +00:00
Dave Wysochanski
e9e5dbb628 Add pvchange -u --all to testsuite.
This can be an interesting test case when using PVs with --metadatacopies 0.
2010-03-17 17:02:01 +00:00
Mike Snitzer
0e57141bb5 testsuite: get stacktrace if test drops core
Requires lvm be built with debugging (-g).
Also requires 'ulimit -c' be non-zero (to drop core file).
2010-03-17 14:55:28 +00:00
Milan Broz
842a3664f0 Disable long living process flag in lvm2app.
This option should be configurable, but for now
do not set it at all.

(lvm2app is used in udisks probers and there
cac cause several nasty races when trying to update
lvmcache during rescan.)
2010-03-17 14:45:28 +00:00
Milan Broz
6cf89391b6 Fix pvcreate device check.
If user try to vgcreate or vgextend non-existent VG,
these messages appears:

# vgcreate xxx /dev/xxx
  Internal error: Volume Group xxx was not unlocked
  Device /dev/xxx not found (or ignored by filtering).
  Unable to add physical volume '/dev/xxx' to volume group 'xxx'.
  Internal error: Attempt to unlock unlocked VG xxx.

(the same with existing VG and non-existing PV & vgextend)
# vgextend vg_test /dev/xxx
...

It is caused because code tries to "refresh" cache if
md filter is switched on using cache destroy.

But we can change filters and rescan even without this
machinery now, just use refresh_filters
(and reset md filter afterwards).

(Patch also  discovers cache alias bug in vgsplit test,
fix it by using better filter line.)
2010-03-17 14:44:18 +00:00
Alasdair Kergon
ca5fd21509 Suppress repeated errors about the same missing PV uuids.
Bypass full device scans when using internally-cached VG metadata.
2010-03-17 02:11:18 +00:00
Alasdair Kergon
d14471e44d fix last checkin 2010-03-16 19:06:57 +00:00
Alasdair Kergon
5197fd72c4 Only do one full device scan during each read of text format metadata. 2010-03-16 17:30:00 +00:00
Alasdair Kergon
c17ac96509 Remove unnecessary full_scan parameter from get_vgids and get_vgnames calls. 2010-03-16 16:57:03 +00:00
Alasdair Kergon
4acb30c464 replace existing_pv with existing_pvl 2010-03-16 15:48:27 +00:00
Alasdair Kergon
d61f611adc Look up missing PVs by uuid not dev_name in _pvs_single to avoid invalid stat.
Make find_pv_in_vg_by_uuid() return same type as related functions.
2010-03-16 15:30:48 +00:00
Alasdair Kergon
b19719801b Introduce is_missing_pv(). 2010-03-16 14:37:38 +00:00
Milan Broz
83a2bc8ec4 Fix clvmd Makefile to not overwrite LIBS from template definition 2010-03-16 08:47:46 +00:00
Alasdair Kergon
aab6f07d14 post-release 2010-03-09 14:01:47 +00:00
Alasdair Kergon
db8748cefd pre-release cleanups 2010-03-09 13:42:28 +00:00
Alasdair Kergon
23b3f55bfb pre-release 2010-03-09 13:13:07 +00:00
Alasdair Kergon
e6573a9bed some missing debug messages 2010-03-09 12:31:51 +00:00
Zdenek Kabelac
d6a19426c7 Update comments for selecting maps
Use dm_snprintf and check result whether we create correct /proc path name
2010-03-09 10:25:50 +00:00
Alasdair Kergon
9426d45cde . 2010-03-09 03:20:12 +00:00
Alasdair Kergon
48d0cd3564 Misc cleanups in the new mlock code, incl. improving some variable names
& messages; using more statics (for now) to avoid redundant
recalculation; validating config file just once on loading; keeping maps
file open.
2010-03-09 03:16:11 +00:00
Zdenek Kabelac
89f61acf2c Use mlock() only on 'r' memory maps 2010-03-08 17:14:21 +00:00
Milan Broz
985d87b5eb Add --help dmsetup option as the synonym for help command. 2010-03-08 16:05:07 +00:00
Milan Broz
0be2dfefdb Add --showkeys parameter description into dmsetup man page.
Also fix minor warning (-c is parameter) in man page formatting.
2010-03-08 16:04:32 +00:00
Zdenek Kabelac
89f30d2df7 Unconditionaly ignore also Virtual Dynamically-linked Shared Object
(VDSO on 32bit is VSyscall on 64bit)
It seems it could be locked on 64bit kernels running 32bit binaries,
but it makes troubles on real 32bit machines where mlock() returns
error when trying to lock such map area. (0xffffe000)
Behavior of mlockall() seems to be similar.
2010-03-08 15:55:52 +00:00
Zdenek Kabelac
0b648ce87b Use '_' prefix for local static variable. 2010-03-05 15:14:03 +00:00
Zdenek Kabelac
0c897ecefd mlockall() -> mlock()
This patch adds a new implementation of locking function instead
of mlockall() that may lock way too much memory (>100MB).
New function instead uses mlock() system call and selectively locks
memory areas from /proc/self/maps trying to avoid locking areas
unused during lock-ed state.

Patch also adds struct cmd_context to all memlock() calls to have
access to configuration.

For backward compatibility functionality of mlockall()
is preserved with "activation/use_mlockall" flag.

As a simple check, locking and unlocking counts the amount of memory
and compares whether values are matching.
2010-03-05 14:48:33 +00:00
zkabelac
f2a6d937a6 Use UDEV_LIBS, and link -ludev only when needed. 2010-03-04 12:12:34 +00:00
zkabelac
76f4498303 Use DL_LIBS, remove -ldl from global LIBS and link -ldl only when needed. 2010-03-04 12:10:40 +00:00
zkabelac
5451f79f55 This patch add SELINUX_LIBS and STATIC_LIBS variables.
For static builds dependency for SELinux libs is not handled by 'ar'.
Till better solution is found, for static builds STATIC_LIBS is used.

Patch updates SELinux detection to use 3rd & 4th parameter for Success/Fail.
Also removes detection of pthread from this check as we know which
version of libdevmapper we are going to link with lvm after merge.

SELinux header check moved to the SELinux test code.
2010-03-04 12:08:26 +00:00
zkabelac
4e69ed9c98 Removes -rdynamic from linking of lvm.static and dmeventd.static. 2010-03-04 12:03:54 +00:00
zkabelac
75f55c758c Pthread linking change
Create new substituted variable PTHREAD_LIBS and link this library
only with tools/libs which really needs it - i.e. dmeventd.

Check for libpthread only for builds with clvmd or dmeventd.

Remove variable LIB_PTHREAD
2010-03-04 11:21:05 +00:00
zkabelac
f9d1b67e86 Readline linking update
Modify linking of readline library. Create new  substituted varible
READLINE_LIBS - readline library is linked ONLY with tools that really use
it - i.e. lvm. (Static lvm does not use readlin).
Previous behaviour put this library into the variable LIBS and thus
linked it with all created object files of lvm project (i.e. plugins...).

READLINE detection is simplified.

Termcap library is linked in only if readline library doesn't have its own
dependency (i.e. old distributions).
2010-03-04 11:19:15 +00:00
zkabelac
04ecf25522 Introduce LVMINTERNAL_LIBS
Keep dependency libraries for liblvm-internal in one place.
2010-03-04 11:12:39 +00:00
zkabelac
52b51b70eb As fsadm is installed by default - it's a common practice to rather
print help text in '--disable' form for such case.
2010-03-04 11:09:08 +00:00
zkabelac
3fdc1390fe This patch moves inclusion of the make.tmpl before DEFS modification,
so it goes in the same order on the compilation line.
(i.e. HAVE_CONFIG_H goes first)
2010-03-04 09:56:56 +00:00
zkabelac
f309bd12c7 Use consistently $() instead of ${} for all Makefile variables,
thought both usage forms are correct.
2010-03-04 09:56:01 +00:00
zkabelac
d388f94c7f Replace CFLOW_CMD only in make.tmpl and use it as variable elsewhere. 2010-03-04 09:53:08 +00:00
zkabelac
0f878a226d Use $(top_builddir) for inclusion of make.tmpl in Makefiles. 2010-03-04 09:51:37 +00:00
zkabelac
a38f899d7e Use datarootdir and fix warning during configure process:
config.status: WARNING:  'make.tmpl.in' seems to ignore the --datarootdir setting.
2010-03-04 09:48:19 +00:00
zkabelac
6c09d7b60c Usage of AC_PROG_SED and AC_PROG_MKDIR_P requires autoconf version 2.61. 2010-03-04 09:46:38 +00:00
Mike Snitzer
8cb8f65010 Handle a misaligned device that reports a -1 alignment_offset.
The kernel's blk_stack_limits() function may flag a device as
'misaligned'.  If it does the alignment_offset will be -1.

Update set_pe_align_offset() to accommodate this corner case.
2010-03-02 21:56:14 +00:00
Alasdair Kergon
49437d9a07 Extend core allocation code in preparation for mirrored log areas. 2010-03-01 20:00:20 +00:00
fabbione
69962eae7c - fix whitespaces all over (tabs/spaces)
- increase timeout to 30 secs (on Chrissie request)
- source both cluster and clvmd for options (like all the other cluster
  init scripts)
- add clustered_vgs and _lvs commodity fns
- move rh_status* fns at the top, so they can be reused
- heavily cleanup start and stop fns from redundant code and unnecessary
  loops
- improve output from different operations
- make the init script lsb compliant
- don´t force kill of the daemon, send only a TERM signal and then wait
for it to exit
- Resolves rhbz#533247
2010-02-26 13:07:43 +00:00
Milan Broz
c440f39874 Remove lvs_in_vg_activated_by_uuid_only call.
There is no difference from lvs_in_vg_activated now,
convert all users to this call.
2010-02-24 20:01:40 +00:00
Milan Broz
40090be220 Always query device by uuid only.
lvm2 devices have always UUID set even if imported from lvm1 metadata.

Patch removes name argument from dev_manager_info call and converts
all activation related calls to use query by UUID.

Also it simplifies mknode call (which is the only user on mknodes parameter).
2010-02-24 20:00:56 +00:00
Dave Wysochanski
281a92bf74 Update WHATS_NEW. 2010-02-24 18:21:15 +00:00
Dave Wysochanski
bbacb7fff0 Add Doxygen file for lvm2app to generate documentation from lvm2app.h.
A simple Doxygen file for lvm2app documentation.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-24 18:16:54 +00:00
Dave Wysochanski
cccdc3eea0 Update doxygen comments for lvm2app.h.
Fix add/remove tag function headers.
Fix a lot of little problems with doxygen comments.
Clarify the basic objects and their handles, and place functions with their
appropriate handles/objects.
All this cleanup moves automatic documentation of lvm2app much closer to being
useful as official documentation.  In the future I will add some examples
and plan to build the examples as part of the unit tests.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-24 18:16:44 +00:00
Dave Wysochanski
aa9c970f97 Update lvm2app interactive unit test for vg/lv tags.
Simple test of vg/lv tag addition/deletion.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-24 18:16:35 +00:00
Dave Wysochanski
4f81adb3c9 Add lvm_lv_get_tags(), lvm_lv_add_tag(), and lvm_lv_remove_tag().
Add lvm2app functions to manage LV tags.
For lvm_lv_get_tags(), we return a list of tags, similar to other
functions that return lists.  An empty list is returned if there
are no tags.  NULL is returned if there is a problem obtaining
the list of tags.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-24 18:16:26 +00:00
Dave Wysochanski
b8f75d74ae Add lvm_vg_get_tags(), lvm_vg_add_tag(), and lvm_vg_remove_tag().
Add lvm2app functions to manage VG tags.
For lvm_vg_get_tags(), we return a list of tags, similar to other
functions that return lists.  An empty list is returned if there
are no VG tags.  NULL is returned if there is a problem obtaining
the list of tags.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-24 18:16:18 +00:00
Dave Wysochanski
befffd65bf Add tag_list_copy() supporting function inside lvm2app.
Add a supporting function to copy a list of internal tags to lvm2app list.
We need to put this here because of the lvm_str_list_t type which we export
in lvm2app.h.  If we didn't export this type, we could put this in the
internal library and use struct str_list.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-24 18:16:10 +00:00
Dave Wysochanski
610cd1a531 Add dm_pool_strdup to allocate memory and copy a tag in {lv|vg}_change_tag()
We need to allocate memory for the tag and copy the tag value before we
add it to the list of tags.  We could put this inside lvm2app since the
tools keep their memory around until vg_write/vg_commit is called, but
we put it inside the internal library to minimize code in lvm2app.
We need to copy the tag passed in by the caller to ensure the lifetime of
the memory until the {vg|lv} handle is released.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-24 18:15:57 +00:00
Dave Wysochanski
08d06030dc Refactor lvchange_tag() to call lv_change_tag() library function.
Similar refactoring to vgchange - pull out common parts and put into
library function for reuse.  Should be no functional change.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-24 18:15:49 +00:00
Dave Wysochanski
083687d0fd Refactor vgcreate to call new vg_change_tag() function.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-24 18:15:40 +00:00
Dave Wysochanski
f874581701 Refactor _vgchange_tag() to vg_change_tag() library function.
Pull out common code to be called from tools as well as lvm2app.
Leave archive() at tool level so we can use from vgcreate
as well as vgchange.  Should be no functional change.
- add stack macro in vgchange

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-24 18:15:05 +00:00
Petr Rockai
e66d76ac5a Add _mlog devices to dependency trees using UUID, not name, in activation. 2010-02-23 15:49:52 +00:00
Petr Rockai
5d7ad8ae75 Relax a check on blkid exit value, which seems to be different in different
versions. Fixes a spurious test failure introduced with -o pipefail.
2010-02-22 14:47:55 +00:00
Mike Snitzer
99ab428344 Do not reload origin again in lv_remove_single() if it had a merging
snapshot.  vg_remove_snapshot() will have already performed the required
reload.
2010-02-17 23:36:45 +00:00
Mike Snitzer
eaef92b02f Refactor snapshot-merge deptree and device removal to support info-by-uuid
Add a merging snapshot to the deptree, using the "error" target, rather
than avoid adding it entirely.  This allows proper cleanup of the -cow
device without having to rename the -cow to use the origin's name as a
prefix.

Move the preloading of the origin LV, after a merge, from
lv_remove_single() to vg_remove_snapshot().  Having vg_remove_snapshot()
preload the origin allows the -cow device to be released so that it can
be removed via deactivate_lv().  lv_remove_single()'s deactivate_lv()
reliably removes the -cow device because the associated snapshot LV,
that is to be removed when a snapshot-merge completes, is always added
to the deptree (and kernel -- via "error" target).

Now when the snapshot LV is removed both the -cow and -real devices
get removed using uuid rather than device name.  This paves the way
for us to switch over to info-by-uuid queries.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2010-02-17 22:59:46 +00:00
Petr Rockai
4982eabe07 In testsuite, catch also failures that happen in the middle of a pipeline. 2010-02-17 15:41:28 +00:00
Dave Wysochanski
c66ce519f4 Add nightly test to cover vg/lv tags add/delete.
Simple nightly test coverage for adding / deleting lv/vg tags via
create and change functions.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-16 17:49:18 +00:00
Dave Wysochanski
e74208778d Add get_{pv|vg|lv}_field() nightly test helper functions.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-16 17:48:46 +00:00
Alasdair Kergon
67a0635e75 use lvm2app to refer to library now 2010-02-16 01:14:34 +00:00
Alasdair Kergon
def0511361 post-release 2010-02-16 00:27:01 +00:00
Alasdair Kergon
29b8138482 pre-release 2010-02-15 23:53:15 +00:00
zkabelac
c6ee487cfc Update 2010-02-15 20:32:27 +00:00
zkabelac
1a6df48bd0 Use dm_report_field_int32 instead of dm_report_field_uint64 for printing '-1' 2010-02-15 20:27:33 +00:00
Dave Wysochanski
6cdf300933 Update lvm2app.h comments to remove hidden VG comment list vgnames/vgids.
Peter recently fixed lvm_list_vg_names() and lvm_list_vg_uuids() so they
no longer return hidden names.  Remove the comment in lvm2app.h.
2010-02-15 19:55:49 +00:00
zkabelac
5365974d6c * update for last 3 commits 2010-02-15 18:42:51 +00:00
zkabelac
f1acefd241 Fix dm_report_field_uint64 to really use 64bit.
(function is currently not in use)
2010-02-15 18:36:48 +00:00
zkabelac
079fec6c5a Cleanup float arithmetic gcc warning. 2010-02-15 18:35:06 +00:00
zkabelac
14258c93de * add more 'const' - fixes gcc constness warning 2010-02-15 18:34:00 +00:00
Peter Rajnoha
03315d77f1 Add LVM_SUPPRESS_LOCKING_FAILURE_MESSAGES environment variable to suppress error
and warning mesages while --ignorelockingfailure is used.
2010-02-15 16:46:56 +00:00
Peter Rajnoha
34843e7d6c Remove hard-coded rule to skip _mimage devices in 11-dm-lvm.rules.
There's a tiny period of time when the _mimage device is visible during
downconversion from mirror to linear. Since it is visible, we need to
create the symlinks, otherwise warning messages will be issued about udev
not creating those symlinks. We have to rely on udev flags completely.
2010-02-15 16:38:22 +00:00
Peter Rajnoha
4efa67c4ec Update man page for dmsetup: --udevcookie, udevcreatecookie and udevreleasecookie. 2010-02-15 16:32:24 +00:00
Peter Rajnoha
2419c1c540 Use udev transactions in testsuite. 2010-02-15 16:30:13 +00:00
Peter Rajnoha
34055f40ac Don't use LVM_UDEV_DISABLE_CHECKING environment variable anymore.
Set the state automatically based on udev and libdevmapper dev path comparison.
If these paths differ, disable udev checking.
2010-02-15 16:26:48 +00:00
Peter Rajnoha
276d32973f Several changes in dmsetup and libdevmapper:
- add DM_UDEV_DISABLE_LIBRARY_FALLBACK udev flag to rely on udev only

 - export dm_udev_create_cookie function to create new cookies on demand

 - add --udevcookie, udevcreatecookie and udevreleasecookie for dmsetup
   (to support "udev transactions" where one cookie value can be used for
    several dmsetup calls)

 - don't use DM_UDEV_DISABLE_CHECKING env. var. anymore and set the state
   automatically (based on udev and libdevmapper dev path comparison)
2010-02-15 16:21:33 +00:00
Peter Rajnoha
f3c51a049d Rename "stat" to "status" in dmeventd_snapshot.c.
Otherwise "warning: declaration of ‘stat’ shadows a global declaration"
will appear because it shadows "stat" from stat.h.
2010-02-15 12:55:20 +00:00
Dave Wysochanski
21573cd4f0 Update simple lvm2app unit test for new size apis.
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-14 03:23:07 +00:00
Dave Wysochanski
f25325f4bb Export lvm_pv_get_size(), lvm_pv_get_free(), lvm_pv_get_dev_size in lvm2app.
We add these exports to show the pv_size and pv_free and dev_size
fields.
Fixes rhbz561423.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-14 03:21:37 +00:00
Dave Wysochanski
5b12edfb34 Fix off by 512 sizes for lvm2app.
Internally we store sizes in sectors, but lvm2app exports sizes
in bytes.  We could get fancier and allow units configuration but
this fix should do for now.

Fixes rhbz561422.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-02-14 03:21:06 +00:00
Mike Snitzer
4697026132 Add 'fail_if_percent_unsupported' arg to _percent() and _percent_run(). 2010-02-10 15:56:20 +00:00
Mike Snitzer
6c39b8f4f0 Add 'fail_if_percent_unsupported' arg to _percent() and _percent_run().
We unfortunately don't yet _know_, in dev_manager_snapshot_percent(), if
a snapshot-merge target is active (activation is deferred if dev is
open); so we can't short-circuit origin devices based purely on existing
LVM LV attributes.

Set 'fail_if_percent_unsupported' in dev_manager_snapshot_percent() for
a merging origin LV, otherwise passing unsupported LV types to _percent
will lead to a default successful return with percent_range as
PERCENT_100.

For a merging origin, PERCENT_100 will result in a polldaemon that runs
infinitely (because completion is PERCENT_0).
2010-02-10 14:38:24 +00:00
Mike Snitzer
4a9af35527 Remove false "failed to find tree node for <lv>" error from _cached_info().
When activating a merging origin it is valid, and expected, to not have
a node in the deptree for both the origin and its merging snapshot.  The
_cached_info() caller is only concerned with whether a device is open.
If there isn't a node in the tree the associated device is definitely
not open.
2010-02-08 23:28:06 +00:00
Petr Rockai
881056af0b Make lvconvert --repair --use-policies exit with success when no action is needed. 2010-02-06 07:44:16 +00:00
Mike Snitzer
e5df490d5e Add multiple snapshot lv 'lvconvert --merge @tag' support via process_each_lv(). 2010-02-05 22:50:56 +00:00
Mike Snitzer
7c0d6057c5 Switch lvconvert_single() over to using get_vg_lock_and_logical_volume()
This change was deferred to help ease the review of previous refactoring
related to using process_each_lv() for lvconvert's merge support.  Not
that doing so _really_ helped but...

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2010-02-05 22:47:22 +00:00
Mike Snitzer
3e5a54a791 lvconvert --merge @tag support
Switch lvconvert's --merge code over to using process_each_lv().  Doing
so adds support for a single 'lvconvert --merge' to start merging
multiple LVs (which includes @tag expansion).

Add 'lvconvert --merge @tag' testing to test/t-snapshot-merge.sh

Adjust man/lvconvert.8.in to reflect these expanded capabilities.

The lvconvert.c implementation requires rereading the VG each iteration
of process_each_lv().  Otherwise a stale VG instance associated with
the LV passed to lvconvert_single_merge() would result in stale VG
metadata being written back out to disk.  This overwrote new metadata
that was written when a previous snapshot LV finished merging (via
lvconvert_poll).  This is only an issue when merging multiple LVs that
share the same VG (a single VG is typical for most LVM configurations on
system disks).

In the end this new support is very useful for performing a "system
rollback" that requires multiple snapshot LVs be merged to their
respective origin LV.

The yum-utils 'fs-snapshot' plugin tags all snapshot LVs that it creates
with a common 'snapshot_tag' that is unique to the yum transaction.
Rolling back a yum transaction, that created LVM snapshots with the tag
'yum_20100129133223', is as simple as:
  lvconvert --merge @yum_20100129133223

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2010-02-05 22:44:37 +00:00
Mike Snitzer
dbfd6e0d12 Prepare for _get_lvconvert_vg() reuse as part of a larger lvconvert.c
refactoring.

Document the need to cleanup the "name" args passed around polldaemon,
lvconvert and pvmove.  It is quite a mess.

Annotate the unused nature of the existing poll_fns->get_copy_vg
methods' 'uuid' arg.
2010-02-05 22:40:49 +00:00
Jonathan Earl Brassow
f607a0a0a6 Adding a new mimage (leg/copy) to a mirror behaves differently
depending on if the mirror has a 'core' or 'disk' log.  When there
is a disk log, the new leg is added by stacking a new mirror on
top of the old (one leg is the old mirror and the other leg is the newly
added device).  When the log is a 'core' log, the new leg is simply added
to the existing mirror and all the devices are re-synced.

The logic that handles collapsing the stacked 'disk' log mirror was
having the effect of causing 'core' logged mirrors to begin resync'ing
for a second time.  I have used the 'CONVERTING' flag to indicate that
a mirror is converting by way of stacking.  This is no longer set for
up-converting core logs.  The final 'collapse' logic can safely be skipped
for 'core' log mirrors - getting rid of the second resync.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2010-02-05 21:49:16 +00:00
Peter Rajnoha
b15ada28ed This is related to liblvm and its lvm_list_vg_names() and lvm_list_vg_uuids() functions
where we should not expose internal VG names/uuids (the ones with "#" prefix )through the
interface. Otherwise, we could end up with library users opening internal VGs which will
initiate locking mechanism that won't be cleaned up properly.

"#orphans_{lvm1, lvm2, pool}" names are treated in a special way, they are truncated first
to "orphans" and this is used as a part of the lock name then (e.g. while calling lvm_vg_open()).
When library user calls lvm_vg_close(), the original name "orphans_{lvm1, lvm2, pool}"
is used directly and therefore no unlock occurs.

We should exclude internal VG names and uuids in the lists provided by lvmcache:
lvmcache_get_vgids() and lvmcache_get_vgnames().
2010-02-03 14:08:39 +00:00
Mike Snitzer
6610f9bdbc Add %ORIGIN support to lv{create,extend,reduce,resize} --extents option
Allow the number of logical extents to be expressed (for a snapshot) as
a percentage of the total space in the Origin Logical Volume with the
suffix %ORIGIN.

Update the relevant man pages accordingly.  Eliminate inconsistencies
between the man pages and tools/commands.h

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2010-02-03 03:58:08 +00:00
Mike Snitzer
5f87626377 move WHATS_NEW entries whose changes were made after 2.02.60 to 2.02.61 2010-02-02 17:48:30 +00:00
Dave Wysochanski
54834a1761 Add copy constructor for struct metadata_area.
Clean up cut&paste code with proper copy constructor.
2010-02-02 16:26:34 +00:00
Alasdair Kergon
adf7081163 Remove pointless versioned symlinks to dmeventd plugin libraries. 2010-02-02 14:09:17 +00:00
Alasdair Kergon
a50b7c55d3 Fix dmeventd snapshot plugin build dependency. 2010-02-02 14:03:50 +00:00
Christine Caulfield
f4f171e04d Make clvmd -V return zero status rather than 1. 2010-02-02 08:54:29 +00:00
Dave Wysochanski
d5ea20a6eb Remove unnecessary "dmsetup resume" after "dmsetup create".
It is not necessary to resume after a create since the create will
create the table and make it active.
2010-02-01 19:43:22 +00:00
Jonathan Earl Brassow
5584f2ae46 Was using dm_list_iterate_items when I should have been using
*_safe.  This had the effect of segfaulting the log daemon when
converting a mirror from one log type to another.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
2010-01-27 22:28:05 +00:00
Milan Broz
b79d8b1ff0 Fix pvmove abort when temporary mirror fails to be cluster-aware.
When activation of pvmove mirror fails on cluster, some nodes
still possibly succeeded in activation.

 - Explicitly deactivate that mirror to be sure
 - properly pair suspend/resume calls to not cause memory lock problems in clvmd

Code cannot simply call _finish_pvmove on cluster in this situation, because
changed LVs are suspended twice (causing memory inbalance) and also temporary
mirror is activated when it is not expected (and we know that it failed already).

Patch prepares special function which remove temporary mirror references from
metadata and then resumes changed LVs.
2010-01-27 13:29:11 +00:00
Milan Broz
1c33c0027f Always query device by using uuid only and not name in clvmd.
Otherwise confusion with the device of the same name
(but different UUID, e.g. non-lvm device) can happen.
2010-01-27 13:23:57 +00:00
Milan Broz
aef0a98840 Add some missing vg_revrts calls when pvmove aborts. 2010-01-26 08:01:18 +00:00
Milan Broz
9d23a04c0e Unlock shared lock if activation calls failed.
Clvmd should unlock new lock if activation in device-mapper fails.
2010-01-26 08:00:02 +00:00
Milan Broz
453bab220b Fix return code of info callbacks.
In dev_manager_info 0 means error and 1 info is returned,
not that device exists (that value is part of info struct).

Fix query by uuid only (no name) which returns 0 when device
does not exist.
2010-01-26 07:58:23 +00:00
Alasdair Kergon
448f8c4240 pre-release 2010-01-23 02:14:30 +00:00
Mike Snitzer
5a414c6ad1 Default to checking LV's progress before waiting in _wait_for_single_lv.
Support "wait before testing" using '+' in pvmove and lvconvert
interval.  Doing so overrides the new default of sleeping after checking
the LV's progress.

Sleeping before checking progress can lead to extraneous polldaemons
being left running.  These polldaemons would have otherwise exited had
they checked before sleeping.  Checking progress before sleeping helps
workaround the subtly unreliable nature of "finished" state checking
in _percent_run.

Update test/t-mirror-names.sh to use '+' when providing its lvconvert
interval.
2010-01-22 21:59:42 +00:00
Jonathan Earl Brassow
65a942f01f a little more information for the cmirrord man page 2010-01-22 21:48:17 +00:00
Milan Broz
6df32fed62 Fix syntax error in cmirror init script
- break cannot be used here
 - remove CLVMDOPTS
 - add echo to stop call
2010-01-22 16:19:38 +00:00
Mike Snitzer
1199b48cd7 Eliminate extra ioctls just to check open_count in _add_new_lv_to_dtree.
DM >= 4.7.0 always returns open_count so just use the associated nodes'
existing info.

Introduce _cached_info() to get an LV's cached info.
2010-01-22 15:40:31 +00:00
Milan Broz
c493f56e1b Document undocumented commits which fixed some bugs.
Go WHATS_NEW!
2010-01-22 14:33:33 +00:00
Mike Snitzer
2e65b8ce5f Removed inactive_table check from _lv_has_target_type. This check
doesn't offer any benefit (that I can recall) and testing validates
that.
2010-01-22 13:28:54 +00:00
Milan Broz
42a6d954d3 Switch memory debugging off for now if compiled with dmeventd,
functions are not thread-safe in debug mode.
2010-01-22 13:20:32 +00:00
Milan Broz
8f73b4c3ae Fix syslog prefix in the first message (dmeventd->lvm). 2010-01-22 12:48:58 +00:00
Milan Broz
adcd06fccb Fix exported symbols for lvm2 dmeventd wrapper. 2010-01-22 12:38:16 +00:00
Milan Broz
c40380f72a Move error message to locking constructor and print
more descriptive message if locking fails instead of
"Locking type -1 initialisation failed."

Use read-only locking instead of misleading ignorelocking option
in message.
2010-01-22 09:45:29 +00:00
Alasdair Kergon
8e44e884a2 post-release 2010-01-22 01:09:09 +00:00
622 changed files with 70570 additions and 32726 deletions

View File

@@ -1,6 +1,6 @@
#
# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
# Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -15,7 +15,6 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
SUBDIRS = doc include man scripts
@@ -33,35 +32,37 @@ ifeq ("@APPLIB@", "yes")
SUBDIRS += liblvm
endif
ifeq ($(MAKECMDGOALS),distclean)
SUBDIRS += daemons/clvmd \
daemons/cmirrord \
daemons/dmeventd/plugins \
daemons/dmeventd \
lib/format1 \
lib/format_pool \
lib/locking \
lib/mirror \
lib/snapshot \
liblvm \
udev \
test/api \
test \
po
DISTCLEAN_TARGETS += lib/misc/configure.h lib/misc/lvm-version.h
DISTCLEAN_DIRS += lcov_reports*
ifeq ("@BUILD_LVMETAD@", "yes")
SUBDIRS += libdaemon
endif
ifeq ("@VERITYSETUP@", "yes")
SUBDIRS += verity
endif
# FIXME Should use intermediate Makefiles here!
ifeq ($(MAKECMDGOALS),distclean)
SUBDIRS = doc include man scripts \
libdaemon lib tools daemons libdm \
udev po liblvm test \
unit-tests/datastruct unit-tests/mm unit-tests/regex verity
endif
DISTCLEAN_DIRS += lcov_reports*
DISTCLEAN_TARGETS += config.cache config.log config.status make.tmpl
include make.tmpl
libdm: include
lib: libdm
lib: libdm libdaemon
liblvm: lib
daemons: lib tools
tools: lib device-mapper
daemons: lib libdaemon tools
tools: lib libdaemon device-mapper
verity: libdm
po: tools daemons
lib.device-mapper: include.device-mapper
libdm.device-mapper: include.device-mapper
liblvm.device-mapper: include.device-mapper
daemons.device-mapper: libdm.device-mapper
tools.device-mapper: libdm.device-mapper
device-mapper: tools.device-mapper daemons.device-mapper man.device-mapper
@@ -74,52 +75,103 @@ po.pofile: tools.pofile daemons.pofile
pofile: po.pofile
endif
ifneq ("@CFLOW_CMD@", "")
tools.cflow: lib.cflow
cflow: tools.cflow
ifneq ("$(CFLOW_CMD)", "")
tools.cflow: libdm.cflow lib.cflow
daemons.cflow: tools.cflow
cflow: include.cflow
endif
ifneq ("@CSCOPE_CMD@", "")
cscope.out: tools
@CSCOPE_CMD@ -b -R
cscope.out:
@CSCOPE_CMD@ -b -R -s$(top_srcdir)
all: cscope.out
endif
DISTCLEAN_TARGETS += cscope.out
check: all
$(MAKE) -C test all
check check_cluster check_local check_lvmetad unit: all
$(MAKE) -C test $(@)
ifneq ("@LCOV@", "")
.PHONY: lcov-reset lcov lcov-dated
install_system_dirs:
$(INSTALL_DIR) $(DESTDIR)$(DEFAULT_SYS_DIR)
$(INSTALL_ROOT_DIR) $(DESTDIR)$(DEFAULT_ARCHIVE_DIR)
$(INSTALL_ROOT_DIR) $(DESTDIR)$(DEFAULT_BACKUP_DIR)
$(INSTALL_ROOT_DIR) $(DESTDIR)$(DEFAULT_CACHE_DIR)
$(INSTALL_ROOT_DIR) $(DESTDIR)$(DEFAULT_LOCK_DIR)
$(INSTALL_ROOT_DIR) $(DESTDIR)$(DEFAULT_RUN_DIR)
$(INSTALL_ROOT_DATA) /dev/null $(DESTDIR)$(DEFAULT_CACHE_DIR)/.cache
install_initscripts:
$(MAKE) -C scripts install_initscripts
install_systemd_units:
$(MAKE) -C scripts install_systemd_units
install_tmpfiles_configuration:
$(MAKE) -C scripts install_tmpfiles_configuration
install_verity:
$(MAKE) -C verity install_verity
$(MAKE) -C man install_verity
LCOV_TRACES = libdm.info lib.info tools.info \
daemons/dmeventd.info daemons/clvmd.info
CLEAN_TARGETS += $(LCOV_TRACES)
ifneq ("$(LCOV)", "")
.PHONY: lcov-reset lcov lcov-dated $(LCOV_TRACES)
ifeq ($(MAKECMDGOALS),lcov-dated)
LCOV_REPORTS_DIR=$(top_srcdir)/lcov_reports-$(shell date +%Y%m%d%k%M%S)
LCOV_REPORTS_DIR := lcov_reports-$(shell date +%Y%m%d%k%M%S)
lcov-dated: lcov
else
LCOV_REPORTS_DIR=$(top_srcdir)/lcov_reports
LCOV_REPORTS_DIR := lcov_reports
endif
lcov-reset:
$(LCOV) -d $(top_srcdir)/dmeventd --zerocounters
$(LCOV) -d $(top_srcdir)/libdm --zerocounters
$(LCOV) -d $(top_srcdir)/lib --zerocounters
$(LCOV) -d $(top_srcdir)/tools --zerocounters
$(LCOV) --zerocounters $(addprefix -d , $(basename $(LCOV_TRACES)))
lcov: all
$(RM) -rf $(LCOV_REPORTS_DIR)
# maybe use subdirs processing to create tracefiles...
$(LCOV_TRACES):
$(LCOV) -b $(basename $@) -d $(basename $@) \
--ignore-errors source -c -o - | $(SED) \
-e "s/\(dmeventd_lvm.[ch]\)/plugins\/lvm2\/\1/" \
-e "s/dmeventd_\(mirror\|snapshot\|thin\|raid\)\.c/plugins\/\1\/dmeventd_\1\.c/" \
>$@
ifneq ("$(GENHTML)", "")
lcov: $(LCOV_TRACES)
$(RM) -r $(LCOV_REPORTS_DIR)
$(MKDIR_P) $(LCOV_REPORTS_DIR)
$(LCOV) -b $(top_srcdir)/libdm -d $(top_srcdir)/libdm -c -o $(LCOV_REPORTS_DIR)/libdm.info
$(LCOV) -b $(top_srcdir)/lib -d $(top_srcdir)/lib -c -o $(LCOV_REPORTS_DIR)/lib.info
$(LCOV) -b $(top_srcdir)/tools -d $(top_srcdir)/tools -c -o $(LCOV_REPORTS_DIR)/tools.info
DMEVENTD_INFO="$(LCOV_REPORTS_DIR)/dmeventd.info" ;\
DMEVENTD_INFO_A="-a $$DMEVENTDINFO" ;\
$(LCOV) -b $(top_srcdir)/dmeventd -d $(top_srcdir)/dmeventd -c -o $$DMEVENTD_INFO || DMEVENTD_INFO_A="" ;\
$(LCOV) $$DMEVENTD_INFO_A -a $(LCOV_REPORTS_DIR)/lib.info \
-a $(LCOV_REPORTS_DIR)/libdm.info \
-a $(LCOV_REPORTS_DIR)/tools.info \
-o $(LCOV_REPORTS_DIR)/lvm.info
ifneq ("@GENHTML@", "")
$(GENHTML) -o $(LCOV_REPORTS_DIR) -p $(top_srcdir) $(LCOV_REPORTS_DIR)/lvm.info
for i in $(LCOV_TRACES); do \
test -s $$i && lc="$$lc $$i"; \
done; \
test -z "$$lc" || $(GENHTML) -p @abs_top_builddir@ \
-o $(LCOV_REPORTS_DIR) $$lc
endif
lcov-dated: lcov
endif
ifeq ("$(TESTING)", "yes")
# testing and report generation
RUBY=ruby1.9 -Ireport-generators/lib -Ireport-generators/test
.PHONEY: unit-test ruby-test test-programs
# FIXME: put dependencies on libdm and liblvm
# FIXME: Should be handled by Makefiles in subdirs, not here at top level.
test-programs:
cd unit-tests/regex && $(MAKE)
cd unit-tests/datastruct && $(MAKE)
cd unit-tests/mm && $(MAKE)
unit-test: test-programs
$(RUBY) report-generators/unit_test.rb $(shell find . -name TESTS)
$(RUBY) report-generators/title_page.rb
memcheck: test-programs
$(RUBY) report-generators/memcheck.rb $(shell find . -name TESTS)
$(RUBY) report-generators/title_page.rb
ruby-test:
$(RUBY) report-generators/test/ts.rb
endif

11
README
View File

@@ -1,3 +1,14 @@
NEWS
----
On 7th June 2012, we froze this copy of the source code and moved it to
fedorahosted.org:
http://git.fedorahosted.org/git/lvm2.git
git clone git://git.fedorahosted.org/git/lvm2.git
-------------------------------------------------------------------------------
This tree contains the LVM2 and device-mapper tools and libraries.
For more information about LVM2 read the changelog in the WHATS_NEW file.

View File

@@ -1 +1 @@
2.02.59(1)-cvs (2010-01-21)
2.02.96(2)-cvs (2012-03-06)

View File

@@ -1 +1 @@
1.02.43-cvs (2010-01-21)
1.02.75-cvs (2012-03-06)

1105
WHATS_NEW

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,346 @@
On 7th June 2012, we froze this copy of the source code and moved it to
fedorahosted.org:
http://git.fedorahosted.org/git/lvm2.git
git clone git://git.fedorahosted.org/git/lvm2.git
--------------------------------------------------------------------------------
Version 1.02.75 -
================================
Remove unsupported udev_get_dev_path libudev call used for checking udev dir.
Set delay_resume_if_new on deptree snapshot origin.
Log value chosen in _find_config_bool like other variable types do.
Synchronize with dead of dmeventd.
Rename (Blk)DevNames/DevNos dmsetup header to (Blk)DevNamesUsed/DevNosUsed.
Add configure --with-veritysetup for independent veritysetup tool.
Properly support passed in dmevent path in dm_event_register_handler().
Remove dmeventd fifos on exit if they are not managed by systemd.
Use SD_ACTIVATION env. var. in systemd units to better detect systemd in use.
Do not run a new dmeventd instance on restart if there's no existing one.
Make the time window for reading fifo longer 5sec (1.02.73).
Version 1.02.74 - 6th March 2012
================================
Check for multiply-mangled names in auto mangling mode.
Fix dm_task_get_name_unmangled to not unmangle already unmangled name.
Check whether device names are properly mangled on ioctl return.
Deactivation of failed thin check on thin pool returns success.
Version 1.02.73 - 3rd March 2012
================================
Test _thread_registry list with holding mutex in dmeventd.
Add dm_tree_node_set_callback() for preload and deactivation hooks.
Drop unsupported TRIM message for thin pool.
Improve logging for fifo startup in dmeventd.
Better detection of missing dmeventd fifo connection (1.02.71).
Add a few pointer validations in dmsetup.
Support dm_task_get_driver_version() query without version string.
Log failure of pthread_join when cleaning unused threads in dmeventd.
Fix empty string warning logic in _find_config_str. (1.02.68)
Fix dm_task_set_name to properly resolve path to dm name (1.02.71).
Add dm_strncpy() function as a faster strncpy() replacement.
Version 1.02.72 - 23rd February 2012
====================================
Avoid memory reallocation for dm_asprintf.
Version 1.02.71 - 20th February 2012
====================================
Switch to using built-in blkid in 13-dm-disk.rules.
Add "watch" rule to 13-dm-disk.rules.
Detect failing fifo and skip 20s retry communication period.
Add DM_DEFAULT_NAME_MANGLING_MODE environment variable as an override.
Add dm_lib_init to automatically initialise device-mapper library on load.
Replace any '\' char with '\\' in dm table specification on input.
Add mangle command to dmsetup to provide renaming to correct mangled form.
Add 'mangled_name' and 'unmangled_name' fields to dmsetup info -c -o.
Add --manglename option to dmsetup to select the name mangling mode.
Add dm_task_get_name_mangled/unmangled to libdevmapper.
Mangle device name on dm_task_set_name/newname call if necessary.
Add dm_set/get_name_mangling_mode to set/get name mangling in libdevmapper.
Add configure --with-default-name-mangling for udev-friendly dev name charset.
Test for parsed words in _umount() dmeventd snapshot plugin.
Fix memory leak in fail path of parse_loop_device_name() in dmsetup.
Check for missing reply_uuid in dm_event_get_registered_device().
Check for allocation failure in dmeventd restart().
Add few missing allocation failures tests in dmsetup.
Fix potential risk of writing in front of buffer in _sysfs_get_dm_name().
Version 1.02.70 - 12th February 2012
====================================
Fix dm_event_get_version() check.
Add pointer test for dependency check in _add_dev().
Validate name and uuid params of dm_tree_add_new_dev_with_udev_flags().
Do not crash for dm_report_init() sort_key == NULL and behave like "".
Return error for failing allocation in dm_asprintf().
Add missing test for failing allocation in dm_realloc() code.
Add test for memory allocation failures in regex matcher code.
Simplify dm_task_set_geometry() and use dm_asprintf().
Set all parameters to 0 for dm_get_next_target() for NULL return.
Fix fd resource leak in error path for _udev_notify_sem_create().
Leave space for '\0' for readline() call in _sysfs_get_kernel_name().
Version 1.02.69 - 1st February 2012
===================================
Clean up dmeventd systemd unit ordering and requirements.
Version 1.02.68 - 26th January 2012
===================================
Reset all members of info struct in dm_tree_add_new_dev_with_udev_flags.
Add dmsetup wipe_table to replace table with one that uses error target.
Add 'blkdevname' and 'blkdevs_used' fields to dmsetup info -c -o.
Add 'blkdevname' option to dmsetup ls --tree to see block device names.
Add -o devno/blkdevname/devname to dmsetup deps and ls.
Add dm_device_get_name to get map name or block device name for given devno.
Remove empty devices when clearing left-over inactive tables in deptree.
Add dm_uuid_prefix/dm_set_uuid_prefix to override hard-coded LVM- prefix.
Improve dmsetup man page description of readahead parameter.
Use sysfs to set/get readahead if possible.
Fix lvm2-monitor init script to use normalized output when using vgs.
Add test for max length (DM_MAX_TYPE_NAME) of target type name.
Include a copy of kernel DM documentation in doc/kernel.
Improve man page style for dmsetup and mention more targets.
Fix _get_proc_number to be tolerant of malformed /proc/misc entries.
Fix missing thread list manipulation protection in dmeventd.
Add ExecReload to dm-event.service for systemd to reload dmeventd properly.
Add dm_config_tree_find_str_allow_empty and dm_config_find_str_allow_empty.
Fix compile-time pool memory locking with DEBUG_MEM.
Fix valgrind error reports in free of pool chunks with DEBUG_MEM.
Align size of structure chunk for fast pool allocator to 8 bytes.
Simplify some pointer operations in dm_free_aux() debug code.
Remove unused dbg_malloc.h file from source tree.
Cleanup backtraces for _create_and_load_v4().
Fix alignment warning in bitcount calculation for raid segment.
Allocate dm_tree structure from dm_tree pool.
Update debug logging for _resume_node.
Add functions to support thin provisioning target.
Improve libdm-config error path reporting.
Update dmsetup resume man with --addnodeonresume/create options.
Add dependency for dm man pages to man subdirectory make all target.
Add dm_tree_retry_remove to use retry logic for device removal in a dm_tree.
Add dm_device_has_mounted_fs fn to check mounted filesystem on a device.
Add dm_device_has_holders fn to to check use of the device by another device.
Add dm_sysfs_dir to libdevmapper to retrieve sysfs location set.
Add dm_set_sysfs_dir to libdevmapper to set sysfs location.
Add --retry option for dmsetup remove to retry removal if not successful.
Add dm_task_retry_remove fn to use retry logic for device removal.
Remove unused passed parameters for _mirror_emit_segment_line().
Add dm_config and string character escaping functions to libdevmapper.
Mark unreleased memory pools as internal error.
Version 1.02.67 - 19th August 2011
==================================
Add dm_tree_node_add_null_area for temporarily-missing raid devs tracked.
Version 1.02.66 - 12th August 2011
==================================
Release geometry buffer in dm_task_destroy.
Update udev rules to skip DM flags decoding for removed devices.
Add compile-time pool memory locking options (to debug shared VG structs).
Remove device name prefix from dmsetup line output if -j & -m or -u supplied.
Remove support for the original version 1 dm ioctls.
Add missing check for allocation failure _create_dir_recursive().
Add support for systemd file descriptor handover in dmeventd.
Fix memory leak in dmsetup _message() memory allocation error path.
Use new oom killer adjustment interface (oom_score_adj) when available.
Add systemd unit files for dmeventd.
Fix read-only identical table reload supression.
Version 1.02.65 - 8th July 2011
===============================
Remove dev name prefix from dmsetup line output if exactly one dev requested.
Report internal error if suspending a device using an already-suspended dev.
Report error if a table load requiring target parameters has none supplied.
Add dmsetup --checks and dm_task_enable_checks framework to validate ioctls.
Add age_in_minutes parameter to dmsetup udevcomplete_all.
Return immediately from dm_lib_exit() if called more than once.
Disable udev fallback by default and add --verifyudev option to dmsetup.
Report internal error if any table is loaded while any dev is known suspended.
Add dm_get_suspended_counter() for number of devs in suspended state by lib.
Fix "all" report field prefix matching to include label fields with pv_all.
Delay resuming new preloaded mirror devices with core logs in deptree code.
Accept new kernel version 3 uname formats in initialisation.
Version 1.02.64 - 29th April 2011
==================================
Require libudev >= 143 when compiling with udev support.
Use word alignment for dm_pool_strdup() and dm_pool_strndup().
Use dm_snprintf() to fix signedness warning in dm_set_dev_dir().
Use unsigned loop counter to fix signedness warning in _other_node_ops().
Fix const cast in dmsetup calls of dm_report_field_string().
Streamline /dev/mapper/control node code for common cases.
Use hard-coded dm control node device number for 2.6.36 kernels and above.
Improve stack debug reporting in dm_task_create().
Fallback to control node creation only if node doesn't exist yet.
Change dm_hash binary functions to take void *key instead of char *.
Fix uninitialised memory use with empty params in _reload_with_suppression_v4.
Lower severity of selabel_lookup and matchpathcon failure to log_debug.
Add test for failed allocation from dm_task_set_uuid() in dmeventd.
Add dm_event_get_version to dmeventd for use with -R.
Avoid dmeventd core dumps when handling request with unknown command ID.
Have dmeventd -R start up even when no existing copy is running.
Accept multiple mapped device names on many dmsetup command lines.
Fix dm_udev_wait calls in dmsetup to occur before readahead display not after.
Include an implicit dm_task_update_nodes() within dm_udev_wait().
Fix _create_and_load_v4 not to lose the --addnodeoncreate setting (1.02.62).
Add inactive table query support for kernel driver >= 4.11.6 (RHEL 5.7).
Log debug open_count in _node_has_closed_parents().
Add a const to dm_report_field_string() data parameter.
Version 1.02.63 - 9th February 2011
===================================
Reinstate DEBUG_MEM as it's part of the API. (1.02.62)
Version 1.02.62 - 4th February 2011
===================================
Add configure --with-device-nodes-on=create for previous behaviour.
Move creation of device nodes from 'create' to 'resume'.
Add --addnodeonresume and --addnodeoncreate options to dmsetup.
Add dm_task_set_add_node to libdevmapper to control dev node creation time.
Add dm_task_secure_data to libdevmapper to wipe ioctl buffers in kernel.
Log debug message when expected uevent is not generated.
Only compile memory debugging code when DEBUG_MEM is set.
Set DM_UDEV_DISABLE_OTHER_RULES_FLAG for suspended DM devices in udev rules.
Begin a new pool object for each row in _output_as_rows() correctly.
Version 1.02.61 - 10th January 2011
===================================
Add DM_COOKIE_AUTO_CREATE to libdevmapper.h.
Export DM_CONTROL_NODE_UMASK and use it while creating /dev/mapper/control.
Version 1.02.60 - 20th December 2010
====================================
Check for unlink failure in remove_lockfile() in dmeventd.
Use dm_free for dm_malloc-ed areas in _clog_ctr/_clog_dtr in cmirrord.
Use char* arithmetic in _process_all() & _targets() in dmsetup.
Change dm_regex_create() API to accept const char * const *patterns.
Add new dm_prepare_selinux_context fn to libdevmapper and use it throughout.
Detect existence of new SELinux selabel interface during configure.
Version 1.02.59 - 6th December 2010
===================================
Add backtraces to _process_mapper_dir and _create_and_load_v4 error paths.
Remove superfluous checks for NULL before calling dm_free.
Version 1.02.58 - 22nd November 2010
====================================
Fix _output_field crash from field_id free with DEBUG_MEM. (1.02.57)
Version 1.02.57 - 8th November 2010
===================================
Fix regex optimiser not to ignore RHS of OR nodes in _find_leftmost_common.
Add dmeventd -R to restart dmeventd without losing monitoring state. (1.02.56)
Fix memory leak of field_id in _output_field function.
Allocate buffer for reporting functions dynamically to support long outputs.
Version 1.02.56 - 25th October 2010
===================================
Return const pointer from dm_basename() in libdevmapper.
Implement dmeventd -R to restart without state loss.
Add dm_zalloc and use it and dm_pool_zalloc throughout.
Add --setuuid to dmsetup rename.
Add dm_task_set_newuuid to set uuid of mapped device post-creation.
Version 1.02.55 - 24th September 2010
=====================================
Fix the way regions are marked complete to avoid slow --nosync cmirror I/O.
Add DM_REPORT_FIELD_TYPE_ID_LEN to libdevmapper.h.
Version 1.02.54 - 18th August 2010
==================================
Fix dm-mod autoloading logic to not assume control node is set correctly.
Add dmeventd/executable to lvm.conf to test alternative dmeventd.
Export dm_event_handler_set_dmeventd_path to override built-in dmeventd path.
Generate libdevmapper-event exported symbols.
Remove superfluous NULL pointer tests before dm_free from dmeventd.
Assume dm-mod autoloading support is in kernel 2.6.36 and higher, not 2.6.35.
Fix udev rules to support udev database content generated by older rules.
Reinstate detection of inappropriate uevent with DISK_RO set and suppress it.
Fix regex ttree off-by-one error.
Add --enable-valgrind-pool to configure.
Fix segfault in regex matcher with characters of ordinal value > 127.
Fix 'void*' arithmetic warnings in dbg_malloc.c and libdm-iface.c.
Wait for node creation before displaying debug info in dmsetup.
Fix return status 0 for "dmsetup info -c -o help".
Add check for kernel semaphore support and disable udev_sync if not available.
Version 1.02.53 - 28th July 2010
================================
Revert failed table load preparation after "create, load and resume".
Switch dmeventd to use dm_create_lockfile and drop duplicate code.
Add dm_create_lockfile to libdm to handle pidfiles for all daemons.
Replace lookup with next in struct dfa_state & calculate states on demand.
Improve the regex matcher, reducing the number of charset nodes used.
Add dm_regex_fingerprint to facilitate regex testing.
Skip ffs(0) in _test_word in bitset functions.
Use "nowatch" udev rule for inappropriate devices.
Version 1.02.52 - 6th July 2010
===============================
Fix dmlosetup snprintf %llu compiler warning.
Add parentheses to some libdevmapper.h macro arguments.
Add printf format attributes to dm_{sn,as}printf and fix a caller.
Move dmeventd man page from install_lvm2 to install_device-mapper. (1.02.50)
Version 1.02.51 - 30th June 2010
================================
Generate libdevmapper exported symbols from header file.
Version 1.02.50 - 23rd June 2010
================================
Fix INTERNAL_ERROR typo in ioctl iface unknown task message.
Fix udev rules to handle spurious events properly.
Use C99 [] not [0] in dm_ulog_request struct to avoid abort when fortified.
Allow use of devmapper header file in C++ mode (extern "C" and __typeof__).
Add dmeventd man page.
Version 1.02.49 - 4th June 2010
===============================
Support autoloading of dm-mod module for kernels from 2.6.35.
Document 'clear' in dmsetup man page.
Fix semctl parameter (union) to avoid misaligned parameter on some arches.
Add dm_tree_node_set_presuspend_node() to presuspend child when deactivating.
Initial support for replicator target.
Version 1.02.48 - 17th May 2010
================================
Use -d to control level of messages sent to syslog by dmeventd.
Change -d to -f to run dmeventd in foreground.
Do not print encryption key in message debug output (cryptsetup luksResume).
Fix dmeventd static build library dependencies.
Fix udev flags on remove in create_and_load error path.
Version 1.02.47 - 30th April 2010
=================================
Add support for new IMPORT{db} udev rule.
Add DM_UDEV_PRIMARY_SOURCE_FLAG udev flag to recognize proper DM events.
Also include udev libs in libdevmapper.pc when udev_sync is enabled.
Cache bitset locations to speed up _calc_states.
Add a regex optimisation pass for shared prefixes and suffixes.
Add dm_bit_and and dm_bitset_equal to libdevmapper.
Simplify dm_bitset_create.
Speed up dm_bit_get_next with ffs().
Version 1.02.46 - 14th April 2010
=================================
Change dm_tree_deactivate_children to fail if device is open.
Wipe memory buffers for dm-ioctl parameters before releasing.
Strictly require libudev if udev_sync is used.
Add support for ioctl's DM_UEVENT_GENERATED_FLAG.
Version 1.02.45 - 9th March 2010
================================
Add --showkeys parameter description to dmsetup man page.
Add --help option as synonym for help command.
Version 1.02.44 - 15th February 2010
====================================
Add DM_UDEV_DISABLE_LIBRARY_FALLBACK udev flag to rely on udev only.
Export dm_udev_create_cookie function to create new cookies on demand.
Add --udevcookie, udevcreatecookie and udevreleasecookie to dmsetup.
Set udev state automatically instead of using DM_UDEV_DISABLE_CHECKING.
Version 1.02.43 - 21st January 2010
===================================
Remove bitset, hash and pool headers superceded by libdevmapper.h.

171
aclocal.m4 vendored Normal file
View File

@@ -0,0 +1,171 @@
# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
# 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*-
# serial 1 (pkg-config-0.24)
#
# Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# PKG_PROG_PKG_CONFIG([MIN-VERSION])
# ----------------------------------
AC_DEFUN([PKG_PROG_PKG_CONFIG],
[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
m4_pattern_allow([^PKG_CONFIG(_PATH)?$])
AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])
AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path])
AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path])
if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
fi
if test -n "$PKG_CONFIG"; then
_pkg_min_version=m4_default([$1], [0.9.0])
AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
PKG_CONFIG=""
fi
fi[]dnl
])# PKG_PROG_PKG_CONFIG
# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
#
# Check to see whether a particular set of modules exists. Similar
# to PKG_CHECK_MODULES(), but does not set variables or print errors.
#
# Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
# only at the first occurence in configure.ac, so if the first place
# it's called might be skipped (such as if it is within an "if", you
# have to call PKG_CHECK_EXISTS manually
# --------------------------------------------------------------
AC_DEFUN([PKG_CHECK_EXISTS],
[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
if test -n "$PKG_CONFIG" && \
AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
m4_default([$2], [:])
m4_ifvaln([$3], [else
$3])dnl
fi])
# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
# ---------------------------------------------
m4_define([_PKG_CONFIG],
[if test -n "$$1"; then
pkg_cv_[]$1="$$1"
elif test -n "$PKG_CONFIG"; then
PKG_CHECK_EXISTS([$3],
[pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`],
[pkg_failed=yes])
else
pkg_failed=untried
fi[]dnl
])# _PKG_CONFIG
# _PKG_SHORT_ERRORS_SUPPORTED
# -----------------------------
AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
_pkg_short_errors_supported=yes
else
_pkg_short_errors_supported=no
fi[]dnl
])# _PKG_SHORT_ERRORS_SUPPORTED
# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
# [ACTION-IF-NOT-FOUND])
#
#
# Note that if there is a possibility the first call to
# PKG_CHECK_MODULES might not happen, you should be sure to include an
# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
#
#
# --------------------------------------------------------------
AC_DEFUN([PKG_CHECK_MODULES],
[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
pkg_failed=no
AC_MSG_CHECKING([for $1])
_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
_PKG_CONFIG([$1][_LIBS], [libs], [$2])
m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
and $1[]_LIBS to avoid the need to call pkg-config.
See the pkg-config man page for more details.])
if test $pkg_failed = yes; then
AC_MSG_RESULT([no])
_PKG_SHORT_ERRORS_SUPPORTED
if test $_pkg_short_errors_supported = yes; then
$1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors "$2" 2>&1`
else
$1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors "$2" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
m4_default([$4], [AC_MSG_ERROR(
[Package requirements ($2) were not met:
$$1_PKG_ERRORS
Consider adjusting the PKG_CONFIG_PATH environment variable if you
installed software in a non-standard prefix.
_PKG_TEXT])
])
elif test $pkg_failed = untried; then
AC_MSG_RESULT([no])
m4_default([$4], [AC_MSG_FAILURE(
[The pkg-config script could not be found or is too old. Make sure it
is in your PATH or set the PKG_CONFIG environment variable to the full
path to pkg-config.
_PKG_TEXT
To get pkg-config, see <http://pkg-config.freedesktop.org/>.])
])
else
$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
AC_MSG_RESULT([yes])
$3
fi[]dnl
])# PKG_CHECK_MODULES

239
autoconf/config.guess vendored
View File

@@ -1,10 +1,10 @@
#! /bin/sh
# Attempt to guess a canonical system name.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
# Free Software Foundation, Inc.
timestamp='2008-01-23'
timestamp='2009-11-20'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -27,16 +27,16 @@ timestamp='2008-01-23'
# the same distribution terms that you use for the rest of that program.
# Originally written by Per Bothner <per@bothner.com>.
# Please send patches to <config-patches@gnu.org>. Submit a context
# diff and a properly formatted ChangeLog entry.
# Originally written by Per Bothner. Please send patches (context
# diff format) to <config-patches@gnu.org> and include a ChangeLog
# entry.
#
# This script attempts to guess a canonical system name similar to
# config.sub. If it succeeds, it prints the system name on stdout, and
# exits with 0. Otherwise, it exits with 1.
#
# The plan is that this can be called by configure scripts if you
# don't specify an explicit build system type.
# You can get the latest version of this script from:
# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
me=`echo "$0" | sed -e 's,.*/,,'`
@@ -170,7 +170,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
eval $set_cc_for_build
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep __ELF__ >/dev/null
| grep -q __ELF__
then
# Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
# Return netbsd for either. FIX?
@@ -324,14 +324,33 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
case `/usr/bin/uname -p` in
sparc) echo sparc-icl-nx7; exit ;;
esac ;;
s390x:SunOS:*:*)
echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
sun4H:SunOS:5.*:*)
echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
echo i386-pc-auroraux${UNAME_RELEASE}
exit ;;
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
eval $set_cc_for_build
SUN_ARCH="i386"
# If there is a compiler, see if it is configured for 64-bit objects.
# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
# This test works for both compilers.
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
SUN_ARCH="x86_64"
fi
fi
echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
sun4*:SunOS:6*:*)
# According to config.sub, this is the proper way to canonicalize
@@ -640,7 +659,7 @@ EOF
# => hppa64-hp-hpux11.23
if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
grep __LP64__ >/dev/null
grep -q __LP64__
then
HP_ARCH="hppa2.0w"
else
@@ -791,12 +810,12 @@ EOF
i*:PW*:*)
echo ${UNAME_MACHINE}-pc-pw32
exit ;;
*:Interix*:[3456]*)
*:Interix*:*)
case ${UNAME_MACHINE} in
x86)
echo i586-pc-interix${UNAME_RELEASE}
exit ;;
EM64T | authenticamd)
authenticamd | genuineintel | EM64T)
echo x86_64-unknown-interix${UNAME_RELEASE}
exit ;;
IA64)
@@ -806,6 +825,9 @@ EOF
[345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
echo i${UNAME_MACHINE}-pc-mks
exit ;;
8664:Windows_NT:*)
echo x86_64-pc-mks
exit ;;
i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
# How do we know it's Interix rather than the generic POSIX subsystem?
# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
@@ -835,6 +857,20 @@ EOF
i*86:Minix:*:*)
echo ${UNAME_MACHINE}-pc-minix
exit ;;
alpha:Linux:*:*)
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
EV5) UNAME_MACHINE=alphaev5 ;;
EV56) UNAME_MACHINE=alphaev56 ;;
PCA56) UNAME_MACHINE=alphapca56 ;;
PCA57) UNAME_MACHINE=alphapca56 ;;
EV6) UNAME_MACHINE=alphaev6 ;;
EV67) UNAME_MACHINE=alphaev67 ;;
EV68*) UNAME_MACHINE=alphaev68 ;;
esac
objdump --private-headers /bin/sh | grep -q ld.so.1
if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
exit ;;
arm*:Linux:*:*)
eval $set_cc_for_build
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
@@ -857,6 +893,17 @@ EOF
frv:Linux:*:*)
echo frv-unknown-linux-gnu
exit ;;
i*86:Linux:*:*)
LIBC=gnu
eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c
#ifdef __dietlibc__
LIBC=dietlibc
#endif
EOF
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
exit ;;
ia64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
@@ -866,74 +913,33 @@ EOF
m68*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
mips:Linux:*:*)
mips:Linux:*:* | mips64:Linux:*:*)
eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c
#undef CPU
#undef mips
#undef mipsel
#undef ${UNAME_MACHINE}
#undef ${UNAME_MACHINE}el
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
CPU=mipsel
CPU=${UNAME_MACHINE}el
#else
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
CPU=mips
CPU=${UNAME_MACHINE}
#else
CPU=
#endif
#endif
EOF
eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
/^CPU/{
s: ::g
p
}'`"
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
;;
mips64:Linux:*:*)
eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c
#undef CPU
#undef mips64
#undef mips64el
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
CPU=mips64el
#else
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
CPU=mips64
#else
CPU=
#endif
#endif
EOF
eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
/^CPU/{
s: ::g
p
}'`"
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
;;
or32:Linux:*:*)
echo or32-unknown-linux-gnu
exit ;;
ppc:Linux:*:*)
echo powerpc-unknown-linux-gnu
padre:Linux:*:*)
echo sparc-unknown-linux-gnu
exit ;;
ppc64:Linux:*:*)
echo powerpc64-unknown-linux-gnu
exit ;;
alpha:Linux:*:*)
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
EV5) UNAME_MACHINE=alphaev5 ;;
EV56) UNAME_MACHINE=alphaev56 ;;
PCA56) UNAME_MACHINE=alphapca56 ;;
PCA57) UNAME_MACHINE=alphapca56 ;;
EV6) UNAME_MACHINE=alphaev6 ;;
EV67) UNAME_MACHINE=alphaev67 ;;
EV68*) UNAME_MACHINE=alphaev68 ;;
esac
objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
parisc64:Linux:*:* | hppa64:Linux:*:*)
echo hppa64-unknown-linux-gnu
exit ;;
parisc:Linux:*:* | hppa:Linux:*:*)
# Look for CPU level
@@ -943,8 +949,11 @@ EOF
*) echo hppa-unknown-linux-gnu ;;
esac
exit ;;
parisc64:Linux:*:* | hppa64:Linux:*:*)
echo hppa64-unknown-linux-gnu
ppc64:Linux:*:*)
echo powerpc64-unknown-linux-gnu
exit ;;
ppc:Linux:*:*)
echo powerpc-unknown-linux-gnu
exit ;;
s390:Linux:*:* | s390x:Linux:*:*)
echo ${UNAME_MACHINE}-ibm-linux
@@ -967,69 +976,6 @@ EOF
xtensa*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
i*86:Linux:*:*)
# The BFD linker knows what the default object file format is, so
# first see if it will tell us. cd to the root directory to prevent
# problems with other programs or directories called `ld' in the path.
# Set LC_ALL=C to ensure ld outputs messages in English.
ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
| sed -ne '/supported targets:/!d
s/[ ][ ]*/ /g
s/.*supported targets: *//
s/ .*//
p'`
case "$ld_supported_targets" in
elf32-i386)
TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
;;
a.out-i386-linux)
echo "${UNAME_MACHINE}-pc-linux-gnuaout"
exit ;;
coff-i386)
echo "${UNAME_MACHINE}-pc-linux-gnucoff"
exit ;;
"")
# Either a pre-BFD a.out linker (linux-gnuoldld) or
# one that does not give us useful --help.
echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
exit ;;
esac
# Determine whether the default compiler is a.out or elf
eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c
#include <features.h>
#ifdef __ELF__
# ifdef __GLIBC__
# if __GLIBC__ >= 2
LIBC=gnu
# else
LIBC=gnulibc1
# endif
# else
LIBC=gnulibc1
# endif
#else
#if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
LIBC=gnu
#else
LIBC=gnuaout
#endif
#endif
#ifdef __dietlibc__
LIBC=dietlibc
#endif
EOF
eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
/^LIBC/{
s: ::g
p
}'`"
test x"${LIBC}" != x && {
echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
exit
}
test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; }
;;
i*86:DYNIX/ptx:4*:*)
# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
# earlier versions are messed up and put the nodename in both
@@ -1058,7 +1004,7 @@ EOF
i*86:syllable:*:*)
echo ${UNAME_MACHINE}-pc-syllable
exit ;;
i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
echo i386-unknown-lynxos${UNAME_RELEASE}
exit ;;
i*86:*DOS:*:*)
@@ -1102,8 +1048,11 @@ EOF
pc:*:*:*)
# Left here for compatibility:
# uname -m prints for DJGPP always 'pc', but it prints nothing about
# the processor, so we play safe by assuming i386.
echo i386-pc-msdosdjgpp
# the processor, so we play safe by assuming i586.
# Note: whatever this is, it MUST be the same as what config.sub
# prints for the "djgpp" host, or else GDB configury will decide that
# this is a cross-build.
echo i586-pc-msdosdjgpp
exit ;;
Intel:Mach:3*:*)
echo i386-pc-mach3
@@ -1141,6 +1090,16 @@ EOF
3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
&& { echo i486-ncr-sysv4; exit; } ;;
NCR*:*:4.2:* | MPRAS*:*:4.2:*)
OS_REL='.3'
test -r /etc/.relid \
&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
&& { echo i486-ncr-sysv4.3${OS_REL}; exit; }
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; }
/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
echo m68k-unknown-lynxos${UNAME_RELEASE}
exit ;;
@@ -1153,7 +1112,7 @@ EOF
rs6000:LynxOS:2.*:*)
echo rs6000-unknown-lynxos${UNAME_RELEASE}
exit ;;
PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
echo powerpc-unknown-lynxos${UNAME_RELEASE}
exit ;;
SM[BE]S:UNIX_SV:*:*)
@@ -1216,6 +1175,9 @@ EOF
BePC:BeOS:*:*) # BeOS running on Intel PC compatible.
echo i586-pc-beos
exit ;;
BePC:Haiku:*:*) # Haiku running on Intel PC compatible.
echo i586-pc-haiku
exit ;;
SX-4:SUPER-UX:*:*)
echo sx4-nec-superux${UNAME_RELEASE}
exit ;;
@@ -1243,6 +1205,16 @@ EOF
*:Darwin:*:*)
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
case $UNAME_PROCESSOR in
i386)
eval $set_cc_for_build
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
UNAME_PROCESSOR="x86_64"
fi
fi ;;
unknown) UNAME_PROCESSOR=powerpc ;;
esac
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
@@ -1324,6 +1296,9 @@ EOF
i*86:rdos:*:*)
echo ${UNAME_MACHINE}-pc-rdos
exit ;;
i*86:AROS:*:*)
echo ${UNAME_MACHINE}-pc-aros
exit ;;
esac
#echo '(No uname command or uname output not recognized.)' 1>&2

91
autoconf/config.sub vendored
View File

@@ -1,10 +1,10 @@
#! /bin/sh
# Configuration validation subroutine script.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
# Free Software Foundation, Inc.
timestamp='2008-01-16'
timestamp='2009-11-20'
# This file is (in principle) common to ALL GNU software.
# The presence of a machine in this file suggests that SOME GNU software
@@ -32,13 +32,16 @@ timestamp='2008-01-16'
# Please send patches to <config-patches@gnu.org>. Submit a context
# diff and a properly formatted ChangeLog entry.
# diff and a properly formatted GNU ChangeLog entry.
#
# Configuration subroutine to validate and canonicalize a configuration type.
# Supply the specified configuration type as an argument.
# If it is invalid, we print an error message on stderr and exit with code 1.
# Otherwise, we print the canonical config type on stdout and succeed.
# You can get the latest version of this script from:
# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
# This file is supposed to be the same for all GNU packages
# and recognize all the CPU types, system types and aliases
# that are meaningful with *any* GNU software.
@@ -122,6 +125,7 @@ maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
case $maybe_os in
nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
kopensolaris*-gnu* | \
storm-chaos* | os2-emx* | rtmk-nova*)
os=-$maybe_os
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
@@ -148,10 +152,13 @@ case $os in
-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-apple | -axis | -knuth | -cray)
-apple | -axis | -knuth | -cray | -microblaze)
os=
basic_machine=$1
;;
-bluegene*)
os=-cnk
;;
-sim | -cisco | -oki | -wec | -winbond)
os=
basic_machine=$1
@@ -249,13 +256,16 @@ case $basic_machine in
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
| i370 | i860 | i960 | ia64 \
| ip2k | iq2000 \
| lm32 \
| m32c | m32r | m32rle | m68000 | m68k | m88k \
| maxq | mb | microblaze | mcore | mep \
| maxq | mb | microblaze | mcore | mep | metag \
| mips | mipsbe | mipseb | mipsel | mipsle \
| mips16 \
| mips64 | mips64el \
| mips64vr | mips64vrel \
| mips64octeon | mips64octeonel \
| mips64orion | mips64orionel \
| mips64r5900 | mips64r5900el \
| mips64vr | mips64vrel \
| mips64vr4100 | mips64vr4100el \
| mips64vr4300 | mips64vr4300el \
| mips64vr5000 | mips64vr5000el \
@@ -268,6 +278,7 @@ case $basic_machine in
| mipsisa64sr71k | mipsisa64sr71kel \
| mipstx39 | mipstx39el \
| mn10200 | mn10300 \
| moxie \
| mt \
| msp430 \
| nios | nios2 \
@@ -276,20 +287,22 @@ case $basic_machine in
| pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
| pyramid \
| rx \
| score \
| sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
| sh64 | sh64le \
| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
| spu | strongarm \
| tahoe | thumb | tic4x | tic80 | tron \
| ubicom32 \
| v850 | v850e \
| we32k \
| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
| z8k)
| z8k | z80)
basic_machine=$basic_machine-unknown
;;
m6811 | m68hc11 | m6812 | m68hc12)
m6811 | m68hc11 | m6812 | m68hc12 | picochip)
# Motorola 68HC11/12.
basic_machine=$basic_machine-unknown
os=-none
@@ -329,14 +342,17 @@ case $basic_machine in
| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
| i*86-* | i860-* | i960-* | ia64-* \
| ip2k-* | iq2000-* \
| lm32-* \
| m32c-* | m32r-* | m32rle-* \
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
| m88110-* | m88k-* | maxq-* | mcore-* \
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
| mips16-* \
| mips64-* | mips64el-* \
| mips64vr-* | mips64vrel-* \
| mips64octeon-* | mips64octeonel-* \
| mips64orion-* | mips64orionel-* \
| mips64r5900-* | mips64r5900el-* \
| mips64vr-* | mips64vrel-* \
| mips64vr4100-* | mips64vr4100el-* \
| mips64vr4300-* | mips64vr4300el-* \
| mips64vr5000-* | mips64vr5000el-* \
@@ -357,21 +373,22 @@ case $basic_machine in
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
| pyramid-* \
| romp-* | rs6000-* \
| sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
| romp-* | rs6000-* | rx-* \
| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
| sparclite-* \
| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
| tahoe-* | thumb-* \
| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* | tile-* \
| tron-* \
| ubicom32-* \
| v850-* | v850e-* | vax-* \
| we32k-* \
| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
| xstormy16-* | xtensa*-* \
| ymp-* \
| z8k-*)
| z8k-* | z80-*)
;;
# Recognize the basic CPU types without company name, with glob match.
xtensa*)
@@ -439,6 +456,10 @@ case $basic_machine in
basic_machine=m68k-apollo
os=-bsd
;;
aros)
basic_machine=i386-pc
os=-aros
;;
aux)
basic_machine=m68k-apple
os=-aux
@@ -455,10 +476,18 @@ case $basic_machine in
basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
os=-linux
;;
bluegene*)
basic_machine=powerpc-ibm
os=-cnk
;;
c90)
basic_machine=c90-cray
os=-unicos
;;
cegcc)
basic_machine=arm-unknown
os=-cegcc
;;
convex-c1)
basic_machine=c1-convex
os=-bsd
@@ -526,6 +555,10 @@ case $basic_machine in
basic_machine=m88k-motorola
os=-sysv3
;;
dicos)
basic_machine=i686-pc
os=-dicos
;;
djgpp)
basic_machine=i586-pc
os=-msdosdjgpp
@@ -699,6 +732,9 @@ case $basic_machine in
basic_machine=ns32k-utek
os=-sysv
;;
microblaze)
basic_machine=microblaze-xilinx
;;
mingw32)
basic_machine=i386-pc
os=-mingw32
@@ -1128,6 +1164,10 @@ case $basic_machine in
basic_machine=z8k-unknown
os=-sim
;;
z80-*-coff)
basic_machine=z80-unknown
os=-sim
;;
none)
basic_machine=none-none
os=-none
@@ -1166,7 +1206,7 @@ case $basic_machine in
we32k)
basic_machine=we32k-att
;;
sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele)
sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
basic_machine=sh-unknown
;;
sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
@@ -1216,6 +1256,9 @@ case $os in
# First match some system type aliases
# that might get confused with valid system types.
# -solaris* is a basic system type, with this one exception.
-auroraux)
os=-auroraux
;;
-solaris1 | -solaris1.*)
os=`echo $os | sed -e 's|solaris1|sunos4|'`
;;
@@ -1236,10 +1279,11 @@ case $os in
# Each alternative MUST END IN A *, to match a version number.
# -sysv* is not here because it comes later, after sysvr4.
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
| -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
| -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
| -sym* | -kopensolaris* \
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
| -aos* \
| -aos* | -aros* \
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
| -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
| -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
@@ -1248,7 +1292,7 @@ case $os in
| -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
| -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* \
| -chorusos* | -chorusrdb* | -cegcc* \
| -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
| -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
| -uxpv* | -beos* | -mpeix* | -udk* \
@@ -1258,7 +1302,7 @@ case $os in
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
| -skyos* | -haiku* | -rdos* | -toppers* | -drops*)
| -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
# Remember, each alternative MUST END IN *, to match a version number.
;;
-qnx*)
@@ -1388,6 +1432,9 @@ case $os in
-zvmoe)
os=-zvmoe
;;
-dicos*)
os=-dicos
;;
-none)
;;
*)
@@ -1585,7 +1632,7 @@ case $basic_machine in
-sunos*)
vendor=sun
;;
-aix*)
-cnk*|-aix*)
vendor=ibm
;;
-beos*)

14137
configure vendored

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -14,12 +14,15 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
.PHONY: dmeventd clvmd cmirrord
ifeq ("@BUILD_LVMETAD@", "yes")
SUBDIRS += lvmetad
endif
.PHONY: dmeventd clvmd cmirrord lvmetad
ifneq ("@CLVMD@", "none")
SUBDIRS = clvmd
SUBDIRS += clvmd
endif
ifeq ("@BUILD_CMIRRORD@", "yes")
@@ -28,9 +31,16 @@ endif
ifeq ("@BUILD_DMEVENTD@", "yes")
SUBDIRS += dmeventd
ifneq ("$(CFLOW_CMD)", "")
daemons.cflow: dmeventd.cflow
endif
endif
include ../make.tmpl
ifeq ($(MAKECMDGOALS),distclean)
SUBDIRS = clvmd cmirrord dmeventd lvmetad
endif
include $(top_builddir)/make.tmpl
ifeq ("@BUILD_DMEVENTD@", "yes")
device-mapper: dmeventd.device-mapper

View File

@@ -14,20 +14,17 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
CCS_LIBS = @CCS_LIBS@
CCS_CFLAGS = @CCS_CFLAGS@
CMAN_LIBS = @CMAN_LIBS@
CMAN_CFLAGS = @CMAN_CFLAGS@
CMAP_LIBS = @CMAP_LIBS@
CMAP_CFLAGS = @CMAP_CFLAGS@
CONFDB_LIBS = @CONFDB_LIBS@
CONFDB_CFLAGS = @CONFDB_CFLAGS@
CPG_LIBS = @CPG_LIBS@
CPG_CFLAGS = @CPG_CFLAGS@
DLM_LIBS = @DLM_LIBS@
DLM_CFLAGS = @DLM_CFLAGS@
GULM_LIBS = @GULM_LIBS@
GULM_CFLAGS = @GULM_CFLAGS@
QUORUM_LIBS = @QUORUM_LIBS@
QUORUM_CFLAGS = @QUORUM_CFLAGS@
SALCK_LIBS = @SALCK_LIBS@
@@ -43,13 +40,6 @@ ifeq ("@DEBUG@", "yes")
DEFS += -DDEBUG
endif
ifneq (,$(findstring gulm,, "@CLVMD@,"))
SOURCES += clvmd-gulm.c tcp-comms.c
LMLIBS += $(CCS_LIBS) $(GULM_LIBS)
CFLAGS += $(CCS_CFLAGS) $(GULM_CFLAGS)
DEFS += -DUSE_GULM
endif
ifneq (,$(findstring cman,, "@CLVMD@,"))
SOURCES += clvmd-cman.c
LMLIBS += $(CMAN_LIBS) $(CONFDB_LIBS) $(DLM_LIBS)
@@ -66,28 +56,40 @@ endif
ifneq (,$(findstring corosync,, "@CLVMD@,"))
SOURCES += clvmd-corosync.c
LMLIBS += $(CONFDB_LIBS) $(CPG_LIBS) $(DLM_LIBS) $(QUORUM_LIBS)
CFLAGS += $(CONFDB_CFLAGS) $(CPG_CFLAGS) $(DLM_CFLAGS) $(QUORUM_CFLAGS)
LMLIBS += $(CMAP_LIBS) $(CONFDB_LIBS) $(CPG_LIBS) $(DLM_LIBS) $(QUORUM_LIBS)
CFLAGS += $(CMAP_CFLAGS) $(CONFDB_CFLAGS) $(CPG_CFLAGS) $(DLM_CFLAGS) $(QUORUM_CFLAGS)
DEFS += -DUSE_COROSYNC
endif
ifneq (,$(findstring singlenode,, &quot;@CLVMD@,&quot;))
SOURCES += clvmd-singlenode.c
DEFS += -DUSE_SINGLENODE
endif
ifeq ($(MAKECMDGOALS),distclean)
SOURCES += clvmd-cman.c
SOURCES += clvmd-openais.c
SOURCES += clvmd-corosync.c
SOURCES += clvmd-singlenode.c
endif
TARGETS = \
clvmd
LVMLIBS = -llvm-internal -lpthread
LVMLIBS = $(LVMINTERNAL_LIBS)
ifeq ("@DMEVENTD@", "yes")
LVMLIBS += -ldevmapper-event
endif
include $(top_builddir)/make.tmpl
LVMLIBS += -ldevmapper
LIBS += $(PTHREAD_LIBS)
DEFS += -D_REENTRANT
CFLAGS += -fno-strict-aliasing
include ../../make.tmpl
INSTALL_TARGETS = \
install_clvmd
@@ -98,10 +100,8 @@ clvmd: $(OBJECTS) $(top_builddir)/lib/liblvm-internal.a
.PHONY: install_clvmd
install_clvmd: $(TARGETS)
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) clvmd \
$(usrsbindir)/clvmd
$(INSTALL_PROGRAM) -D clvmd $(usrsbindir)/clvmd
install: $(INSTALL_TARGETS)
install_cluster: $(INSTALL_TARGETS)

View File

@@ -22,37 +22,39 @@
#ifndef _CLVM_H
#define _CLVM_H
#include "configure.h"
struct clvm_header {
uint8_t cmd; /* See below */
uint8_t flags; /* See below */
uint16_t xid; /* Transaction ID */
uint32_t clientid; /* Only used in Daemon->Daemon comms */
int32_t status; /* For replies, whether request succeeded */
uint32_t arglen; /* Length of argument below.
If >1500 then it will be passed
uint32_t arglen; /* Length of argument below.
If >1500 then it will be passed
around the cluster in the system LV */
char node[1]; /* Actually a NUL-terminated string, node name.
If this is empty then the command is
forwarded to all cluster nodes unless
FLAG_LOCAL is also set. */
char args[1]; /* Arguments for the command follow the
If this is empty then the command is
forwarded to all cluster nodes unless
FLAG_LOCAL or FLAG_REMOTE is also set. */
char args[1]; /* Arguments for the command follow the
node name, This member is only
valid if the node name is empty */
} __attribute__ ((packed));
/* Flags */
#define CLVMD_FLAG_LOCAL 1 /* Only do this on the local node */
#define CLVMD_FLAG_SYSTEMLV 2 /* Data in system LV under my node name */
#define CLVMD_FLAG_NODEERRS 4 /* Reply has errors in node-specific portion */
#define CLVMD_FLAG_LOCAL 1 /* Only do this on the local node */
#define CLVMD_FLAG_SYSTEMLV 2 /* Data in system LV under my node name */
#define CLVMD_FLAG_NODEERRS 4 /* Reply has errors in node-specific portion */
#define CLVMD_FLAG_REMOTE 8 /* Do this on all nodes except for the local node */
/* Name of the local socket to communicate between libclvm and clvmd */
//static const char CLVMD_SOCKNAME[]="/var/run/clvmd";
static const char CLVMD_SOCKNAME[] = "\0clvmd";
/* Name of the local socket to communicate between lvm and clvmd */
static const char CLVMD_SOCKNAME[]= DEFAULT_RUN_DIR "/clvmd.sock";
/* Internal commands & replies */
#define CLVMD_CMD_REPLY 1
#define CLVMD_CMD_VERSION 2 /* Send version around cluster when we start */
#define CLVMD_CMD_GOAWAY 3 /* Die if received this - we are running
#define CLVMD_CMD_GOAWAY 3 /* Die if received this - we are running
an incompatible version */
#define CLVMD_CMD_TEST 4 /* Just for mucking about */
@@ -69,4 +71,12 @@ static const char CLVMD_SOCKNAME[] = "\0clvmd";
#define CLVMD_CMD_GET_CLUSTERNAME 41
#define CLVMD_CMD_SET_DEBUG 42
#define CLVMD_CMD_VG_BACKUP 43
#define CLVMD_CMD_RESTART 44
#define CLVMD_CMD_SYNC_NAMES 45
/* Used internally by some callers, but not part of the protocol.*/
#define NODE_ALL "*"
#define NODE_LOCAL "."
#define NODE_REMOTE "^"
#endif

View File

@@ -17,38 +17,19 @@
* CMAN communication layer for clvmd.
*/
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include "clvmd-common.h"
#include <configure.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <syslog.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <signal.h>
#include <unistd.h>
#include <fcntl.h>
#include <getopt.h>
#include <errno.h>
#include <libdevmapper.h>
#include <libdlm.h>
#include "clvmd-comms.h"
#include "clvm.h"
#include "lvm-logging.h"
#include "clvmd.h"
#include "lvm-functions.h"
#include <libdlm.h>
#include <syslog.h>
#define LOCKSPACE_NAME "clvmd"
struct clvmd_node
@@ -108,16 +89,17 @@ static int _init_cluster(void)
DEBUGLOG("CMAN initialisation complete\n");
/* Create a lockspace for LV & VG locks to live in */
lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
if (!lockspace) {
if (errno == EEXIST) {
lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
}
lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
if (!lockspace) {
syslog(LOG_ERR, "Unable to create lockspace for CLVM: %m");
syslog(LOG_ERR, "Unable to create DLM lockspace for CLVM: %m");
return -1;
}
}
DEBUGLOG("Created DLM lockspace for CLVMD.\n");
} else
DEBUGLOG("Opened existing DLM lockspace for CLVMD.\n");
dlm_ls_pthread_init(lockspace);
DEBUGLOG("DLM initialisation complete\n");
return 0;
@@ -497,6 +479,7 @@ static int _get_cluster_name(char *buf, int buflen)
}
static struct cluster_ops _cluster_cman_ops = {
.name = "cman",
.cluster_init_completed = _cluster_init_completed,
.cluster_send_message = _cluster_send_message,
.name_from_csid = _name_from_csid,

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -50,36 +50,22 @@
*/
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include "clvmd-common.h"
#include <configure.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <unistd.h>
#include <errno.h>
#include <libdevmapper.h>
#include <libdlm.h>
#include "locking.h"
#include "lvm-logging.h"
#include "lvm-functions.h"
#include "clvmd-comms.h"
#include "clvm.h"
#include "clvmd.h"
#include "lvm-globals.h"
#include "lvm-functions.h"
#include "locking.h"
#include <sys/utsname.h>
extern debug_t debug;
extern struct cluster_ops *clops;
static int restart_clvmd(void);
/* This is where all the real work happens:
NOTE: client will be NULL when this is executed on a remote node */
@@ -95,6 +81,9 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
unsigned char lock_cmd;
unsigned char lock_flags;
/* Reset test mode before we start */
init_test(0);
/* Do the command */
switch (msg->cmd) {
/* Just a test message */
@@ -110,11 +99,13 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
*buf = new_buf;
}
if (*buf) {
uname(&nodeinfo);
*retlen = 1 + snprintf(*buf, buflen,
"TEST from %s: %s v%s",
nodeinfo.nodename, args,
nodeinfo.release);
if (uname(&nodeinfo))
memset(&nodeinfo, 0, sizeof(nodeinfo));
*retlen = 1 + dm_snprintf(*buf, buflen,
"TEST from %s: %s v%s",
nodeinfo.nodename, args,
nodeinfo.release);
}
break;
@@ -124,20 +115,23 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
lockname = &args[2];
/* Check to see if the VG is in use by LVM1 */
status = do_check_lvm1(lockname);
if (lock_flags & LCK_TEST_MODE)
init_test(1);
do_lock_vg(lock_cmd, lock_flags, lockname);
break;
case CLVMD_CMD_LOCK_LV:
/* This is the biggie */
lock_cmd = args[0] & (LCK_NONBLOCK | LCK_HOLD | LCK_SCOPE_MASK | LCK_TYPE_MASK);
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
if (lock_flags & LCK_TEST_MODE)
init_test(1);
status = do_lock_lv(lock_cmd, lock_flags, lockname);
/* Replace EIO with something less scary */
if (status == EIO) {
*retlen =
1 + snprintf(*buf, buflen, "%s",
get_last_lvm_error());
*retlen = 1 + dm_snprintf(*buf, buflen, "%s",
get_last_lvm_error());
return EIO;
}
break;
@@ -147,15 +141,23 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
if (buflen < 3)
return EIO;
if ((locktype = do_lock_query(lockname)))
*retlen = 1 + snprintf(*buf, buflen, "%s", locktype);
*retlen = 1 + dm_snprintf(*buf, buflen, "%s", locktype);
break;
case CLVMD_CMD_REFRESH:
do_refresh_cache();
break;
case CLVMD_CMD_SYNC_NAMES:
lvm_do_fs_unlock();
break;
case CLVMD_CMD_SET_DEBUG:
debug = args[0];
clvmd_set_debug((debug_t) args[0]);
break;
case CLVMD_CMD_RESTART:
status = restart_clvmd();
break;
case CLVMD_CMD_GET_CLUSTERNAME:
@@ -179,7 +181,8 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
/* Check the status of the command and return the error text */
if (status) {
*retlen = 1 + snprintf(*buf, buflen, "%s", strerror(status));
*retlen = 1 + ((*buf) ? dm_snprintf(*buf, buflen, "%s",
strerror(status)) : -1);
}
return status;
@@ -192,7 +195,6 @@ static int lock_vg(struct local_client *client)
struct clvm_header *header =
(struct clvm_header *) client->bits.localsock.cmd;
unsigned char lock_cmd;
unsigned char lock_flags;
int lock_mode;
char *args = header->node + strlen(header->node) + 1;
int lkid;
@@ -214,7 +216,7 @@ static int lock_vg(struct local_client *client)
lock_cmd = args[0] & (LCK_NONBLOCK | LCK_HOLD | LCK_SCOPE_MASK | LCK_TYPE_MASK);
lock_mode = ((int)lock_cmd & LCK_TYPE_MASK);
lock_flags = args[1];
/* lock_flags = args[1]; */
lockname = &args[2];
DEBUGLOG("doing PRE command LOCK_VG '%s' at %x (client=%p)\n", lockname, lock_cmd, client);
@@ -234,11 +236,12 @@ static int lock_vg(struct local_client *client)
/* Read locks need to be PR; other modes get passed through */
if (lock_mode == LCK_READ)
lock_mode = LCK_PREAD;
status = sync_lock(lockname, lock_mode, (lock_cmd & LCK_NONBLOCK) ? LKF_NOQUEUE : 0, &lkid);
status = sync_lock(lockname, lock_mode, (lock_cmd & LCK_NONBLOCK) ? LCKF_NOQUEUE : 0, &lkid);
if (status)
status = errno;
else
dm_hash_insert(lock_hash, lockname, (void *)(long)lkid);
if (!dm_hash_insert(lock_hash, lockname, (void *)(long)lkid))
return ENOMEM;
}
return status;
@@ -255,13 +258,14 @@ int do_pre_command(struct local_client *client)
unsigned char lock_cmd;
unsigned char lock_flags;
char *args = header->node + strlen(header->node) + 1;
int lockid;
int lockid = 0;
int status = 0;
char *lockname;
init_test(0);
switch (header->cmd) {
case CLVMD_CMD_TEST:
status = sync_lock("CLVMD_TEST", LKM_EXMODE, 0, &lockid);
status = sync_lock("CLVMD_TEST", LCK_EXCL, 0, &lockid);
client->bits.localsock.private = (void *)(long)lockid;
break;
@@ -277,6 +281,8 @@ int do_pre_command(struct local_client *client)
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
if (lock_flags & LCK_TEST_MODE)
init_test(1);
status = pre_lock_lv(lock_cmd, lock_flags, lockname);
break;
@@ -284,7 +290,9 @@ int do_pre_command(struct local_client *client)
case CLVMD_CMD_GET_CLUSTERNAME:
case CLVMD_CMD_SET_DEBUG:
case CLVMD_CMD_VG_BACKUP:
case CLVMD_CMD_SYNC_NAMES:
case CLVMD_CMD_LOCK_QUERY:
case CLVMD_CMD_RESTART:
break;
default:
@@ -306,6 +314,7 @@ int do_post_command(struct local_client *client)
char *args = header->node + strlen(header->node) + 1;
char *lockname;
init_test(0);
switch (header->cmd) {
case CLVMD_CMD_TEST:
status =
@@ -313,18 +322,18 @@ int do_post_command(struct local_client *client)
client->bits.localsock.private = 0;
break;
case CLVMD_CMD_LOCK_VG:
case CLVMD_CMD_VG_BACKUP:
case CLVMD_CMD_LOCK_QUERY:
/* Nothing to do here */
break;
case CLVMD_CMD_LOCK_LV:
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
if (lock_flags & LCK_TEST_MODE)
init_test(1);
status = post_lock_lv(lock_cmd, lock_flags, lockname);
break;
default:
/* Nothing to do here */
break;
}
return status;
}
@@ -333,21 +342,93 @@ int do_post_command(struct local_client *client)
/* Called when the client is about to be deleted */
void cmd_client_cleanup(struct local_client *client)
{
if (client->bits.localsock.private) {
struct dm_hash_node *v;
struct dm_hash_table *lock_hash =
(struct dm_hash_table *)client->bits.localsock.private;
struct dm_hash_table *lock_hash;
int lkid;
char *lockname;
if (!client->bits.localsock.private)
return;
lock_hash = (struct dm_hash_table *)client->bits.localsock.private;
dm_hash_iterate(v, lock_hash) {
int lkid = (int)(long)dm_hash_get_data(lock_hash, v);
char *lockname = dm_hash_get_key(lock_hash, v);
lkid = (int)(long)dm_hash_get_data(lock_hash, v);
lockname = dm_hash_get_key(lock_hash, v);
DEBUGLOG("cleanup: Unlocking lock %s %x\n", lockname, lkid);
sync_unlock(lockname, lkid);
(void) sync_unlock(lockname, lkid);
}
dm_hash_destroy(lock_hash);
client->bits.localsock.private = 0;
}
}
static int restart_clvmd(void)
{
const char **argv;
char *lv_name;
int argc = 0, max_locks = 0;
struct dm_hash_node *hn = NULL;
char debug_arg[16];
const char *clvmd = getenv("LVM_CLVMD_BINARY") ? : CLVMD_PATH;
DEBUGLOG("clvmd restart requested\n");
/* Count exclusively-open LVs */
do {
hn = get_next_excl_lock(hn, &lv_name);
if (lv_name) {
max_locks++;
if (!*lv_name)
break; /* FIXME: Is this error ? */
}
} while (hn);
/* clvmd + locks (-E uuid) + debug (-d X) + NULL */
if (!(argv = malloc((max_locks * 2 + 5) * sizeof(*argv))))
goto_out;
/*
* Build the command-line
*/
argv[argc++] = "clvmd";
/* Propogate debug options */
if (clvmd_get_debug()) {
if (dm_snprintf(debug_arg, sizeof(debug_arg), "-d%u", clvmd_get_debug()) < 0)
goto_out;
argv[argc++] = debug_arg;
}
argv[argc++] = "-I";
argv[argc++] = clops->name;
/* Now add the exclusively-open LVs */
hn = NULL;
do {
hn = get_next_excl_lock(hn, &lv_name);
if (lv_name) {
if (!*lv_name)
break; /* FIXME: Is this error ? */
argv[argc++] = "-E";
argv[argc++] = lv_name;
DEBUGLOG("excl lock: %s\n", lv_name);
}
} while (hn);
argv[argc] = NULL;
/* Exec new clvmd */
DEBUGLOG("--- Restarting %s ---\n", clvmd);
for (argc = 1; argv[argc]; argc++) DEBUGLOG("--- %d: %s\n", argc, argv[argc]);
/* NOTE: This will fail when downgrading! */
execvp(clvmd, (char **)argv);
out:
/* We failed */
DEBUGLOG("Restart of clvmd failed.\n");
free(argv);
return EIO;
}

View File

@@ -0,0 +1,33 @@
/*
* Copyright (C) 2010 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* This file must be included first by every clvmd source file.
*/
#ifndef _LVM_CLVMD_COMMON_H
#define _LVM_CLVMD_COMMON_H
#include "configure.h"
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include "libdevmapper.h"
#include "lvm-logging.h"
#include <unistd.h>
#include <sys/stat.h>
#endif

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -23,6 +23,7 @@
struct local_client;
struct cluster_ops {
const char *name;
void (*cluster_init_completed) (void);
int (*cluster_send_message) (const void *buf, int msglen,
@@ -54,13 +55,6 @@ struct cluster_ops {
};
#ifdef USE_GULM
# include "tcp-comms.h"
struct cluster_ops *init_gulm_cluster(void);
#define MAX_CSID_LEN GULM_MAX_CSID_LEN
#define MAX_CLUSTER_MEMBER_NAME_LEN GULM_MAX_CLUSTER_MEMBER_NAME_LEN
#endif
#ifdef USE_CMAN
# include <netinet/in.h>
# include "libcman.h"
@@ -110,5 +104,16 @@ struct cluster_ops *init_openais_cluster(void);
struct cluster_ops *init_corosync_cluster(void);
#endif
#ifdef USE_SINGLENODE
# define SINGLENODE_CSID_LEN (sizeof(int))
# ifndef MAX_CLUSTER_MEMBER_NAME_LEN
# define MAX_CLUSTER_MEMBER_NAME_LEN 64
# endif
# define SINGLENODE_MAX_CLUSTER_MESSAGE 65535
# ifndef MAX_CSID_LEN
# define MAX_CSID_LEN sizeof(int)
# endif
struct cluster_ops *init_singlenode_cluster(void);
#endif
#endif

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2009 Red Hat, Inc. All rights reserved.
* Copyright (C) 2009-2012 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -17,46 +17,31 @@
* and lock manager.
*/
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include "clvmd-common.h"
#include <configure.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <signal.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <utmpx.h>
#include <syslog.h>
#include <assert.h>
#include <libdevmapper.h>
#include <corosync/corotypes.h>
#include <corosync/cpg.h>
#include <corosync/quorum.h>
#include <corosync/confdb.h>
#include <libdlm.h>
#include "locking.h"
#include "lvm-logging.h"
#include "clvm.h"
#include "clvmd-comms.h"
#include "lvm-functions.h"
#include "clvmd.h"
#include "lvm-functions.h"
#include "locking.h"
#include <corosync/cpg.h>
#include <corosync/quorum.h>
#ifdef HAVE_COROSYNC_CONFDB_H
# include <corosync/confdb.h>
#elif defined HAVE_COROSYNC_CMAP_H
# include <corosync/cmap.h>
#else
# error "Either HAVE_COROSYNC_CONFDB_H or HAVE_COROSYNC_CMAP_H must be defined."
#endif
#include <libdlm.h>
#include <syslog.h>
/* Timeout value for several corosync calls */
#define LOCKSPACE_NAME "clvmd"
@@ -297,6 +282,10 @@ static int _init_cluster(void)
{
cs_error_t err;
#ifdef QUORUM_SET /* corosync/quorum.h */
uint32_t quorum_type;
#endif
node_hash = dm_hash_create(100);
err = cpg_initialize(&cpg_handle,
@@ -308,8 +297,21 @@ static int _init_cluster(void)
return cs_to_errno(err);
}
#ifdef QUORUM_SET
err = quorum_initialize(&quorum_handle,
&quorum_callbacks,
&quorum_type);
if (quorum_type != QUORUM_SET) {
syslog(LOG_ERR, "Corosync quorum service is not configured");
DEBUGLOG("Corosync quorum service is not configured");
return EINVAL;
}
#else
err = quorum_initialize(&quorum_handle,
&quorum_callbacks);
#endif
if (err != CS_OK) {
syslog(LOG_ERR, "Cannot initialise Corosync quorum service: %d",
err);
@@ -317,19 +319,18 @@ static int _init_cluster(void)
return cs_to_errno(err);
}
/* Create a lockspace for LV & VG locks to live in */
lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
if (!lockspace) {
if (errno == EEXIST) {
lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
}
lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
if (!lockspace) {
syslog(LOG_ERR, "Unable to create lockspace for CLVM: %m");
quorum_finalize(quorum_handle);
syslog(LOG_ERR, "Unable to create DLM lockspace for CLVM: %m");
return -1;
}
}
DEBUGLOG("Created DLM lockspace for CLVMD.\n");
} else
DEBUGLOG("Opened existing DLM lockspace for CLVMD.\n");
dlm_ls_pthread_init(lockspace);
DEBUGLOG("DLM initialisation complete\n");
@@ -575,6 +576,7 @@ static int _cluster_send_message(const void *buf, int msglen, const char *csid,
return cs_to_errno(err);
}
#ifdef HAVE_COROSYNC_CONFDB_H
/*
* We are not necessarily connected to a Red Hat Cluster system,
* but if we are, this returns the cluster name from cluster.conf.
@@ -621,7 +623,40 @@ out:
return 0;
}
#elif defined HAVE_COROSYNC_CMAP_H
static int _get_cluster_name(char *buf, int buflen)
{
cmap_handle_t cmap_handle = 0;
int result;
char *name = NULL;
/* This is a default in case everything else fails */
strncpy(buf, "Corosync", buflen);
/* Look for a cluster name in cmap */
result = cmap_initialize(&cmap_handle);
if (result != CS_OK)
return 0;
result = cmap_get_string(cmap_handle, "totem.cluster_name", &name);
if (result != CS_OK)
goto out;
memset(buf, 0, buflen);
strncpy(buf, name, buflen - 1);
out:
if (name)
free(name);
cmap_finalize(cmap_handle);
return 0;
}
#endif
static struct cluster_ops _cluster_corosync_ops = {
.name = "corosync",
.cluster_init_completed = NULL,
.cluster_send_message = _cluster_send_message,
.name_from_csid = _name_from_csid,

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +0,0 @@
/* DLM constant that clvmd uses as a generic NONBLOCK lock flag */
#define LKF_NOQUEUE 1
extern int get_next_node_csid(void **context, char *csid);
extern void add_down_node(char *csid);
extern int gulm_fd(void);
extern int get_ip_address(const char *node, char *addr);
extern void tcp_remove_client(const char *csid);
extern int alloc_client(int fd, const char *csid, struct local_client **new_client);
void gulm_add_up_node(const char *csid);
int gulm_name_from_csid(const char *csid, char *name);

View File

@@ -17,33 +17,11 @@
* and lock manager.
*/
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include "clvmd-common.h"
#include <configure.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <signal.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <utmpx.h>
#include <syslog.h>
#include <assert.h>
#include <libdevmapper.h>
#include <openais/saAis.h>
#include <openais/saLck.h>
@@ -52,7 +30,6 @@
#include <corosync/cpg.h>
#include "locking.h"
#include "lvm-logging.h"
#include "clvm.h"
#include "clvmd-comms.h"
#include "lvm-functions.h"
@@ -250,7 +227,7 @@ static void openais_cpg_deliver_callback (cpg_handle_t handle,
memcpy(&target_nodeid, msg, OPENAIS_CSID_LEN);
DEBUGLOG("%u got message from nodeid %d for %d. len %d\n",
DEBUGLOG("%u got message from nodeid %d for %d. len %" PRIsize_t "\n",
our_nodeid, nodeid, target_nodeid, msg_len-4);
if (nodeid != our_nodeid)
@@ -268,7 +245,8 @@ static void openais_cpg_confchg_callback(cpg_handle_t handle,
int i;
struct node_info *ninfo;
DEBUGLOG("confchg callback. %d joined, %d left, %d members\n",
DEBUGLOG("confchg callback. %" PRIsize_t " joined, "
"%" PRIsize_t " left, %" PRIsize_t " members\n",
joined_list_entries, left_list_entries, member_list_entries);
for (i=0; i<joined_list_entries; i++) {
@@ -528,11 +506,11 @@ static int _lock_resource(char *resource, int mode, int flags, int *lockid)
saLckResourceClose(res_handle);
return ais_to_errno(err);
}
/* Wait for it to complete */
DEBUGLOG("lock_resource returning %d, lock_id=%llx\n", err,
lock_id);
DEBUGLOG("lock_resource returning %d, lock_id=%" PRIx64 "\n",
err, lock_id);
linfo->lock_id = lock_id;
linfo->res_handle = res_handle;
@@ -553,7 +531,7 @@ static int _unlock_resource(char *resource, int lockid)
if (!linfo)
return 0;
DEBUGLOG("unlock_resource: lockid: %llx\n", linfo->lock_id);
DEBUGLOG("unlock_resource: lockid: %" PRIx64 "\n", linfo->lock_id);
err = saLckResourceUnlock(linfo->lock_id, SA_TIME_END);
if (err != SA_AIS_OK)
{
@@ -689,6 +667,7 @@ static int _get_cluster_name(char *buf, int buflen)
}
static struct cluster_ops _cluster_openais_ops = {
.name = "openais",
.cluster_init_completed = NULL,
.cluster_send_message = _cluster_send_message,
.name_from_csid = _name_from_csid,

View File

@@ -0,0 +1,316 @@
/*
* Copyright (C) 2009 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "clvmd-common.h"
#include <pthread.h>
#include "locking.h"
#include "clvm.h"
#include "clvmd-comms.h"
#include "lvm-functions.h"
#include "clvmd.h"
#include <sys/un.h>
#include <sys/socket.h>
#include <fcntl.h>
static const char SINGLENODE_CLVMD_SOCKNAME[] = DEFAULT_RUN_DIR "/clvmd_singlenode.sock";
static int listen_fd = -1;
static struct dm_hash_table *_locks;
static int _lockid;
struct lock {
int lockid;
int mode;
int excl;
};
static void close_comms(void)
{
if (listen_fd != -1 && close(listen_fd))
stack;
(void)unlink(SINGLENODE_CLVMD_SOCKNAME);
listen_fd = -1;
}
static int init_comms(void)
{
struct sockaddr_un addr;
mode_t old_mask;
close_comms();
(void) dm_prepare_selinux_context(SINGLENODE_CLVMD_SOCKNAME, S_IFSOCK);
old_mask = umask(0077);
listen_fd = socket(PF_UNIX, SOCK_STREAM, 0);
if (listen_fd < 0) {
DEBUGLOG("Can't create local socket: %s\n", strerror(errno));
goto error;
}
/* Set Close-on-exec */
if (fcntl(listen_fd, F_SETFD, 1)) {
DEBUGLOG("Setting CLOEXEC on client fd failed: %s\n", strerror(errno));
goto error;
}
memset(&addr, 0, sizeof(addr));
memcpy(addr.sun_path, SINGLENODE_CLVMD_SOCKNAME,
sizeof(SINGLENODE_CLVMD_SOCKNAME));
addr.sun_family = AF_UNIX;
if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
DEBUGLOG("Can't bind local socket: %s\n", strerror(errno));
goto error;
}
if (listen(listen_fd, 10) < 0) {
DEBUGLOG("Can't listen local socket: %s\n", strerror(errno));
goto error;
}
umask(old_mask);
(void) dm_prepare_selinux_context(NULL, 0);
return 0;
error:
umask(old_mask);
(void) dm_prepare_selinux_context(NULL, 0);
close_comms();
return -1;
}
static int _init_cluster(void)
{
int r;
if (!(_locks = dm_hash_create(128))) {
DEBUGLOG("Failed to allocate single-node hash table.\n");
return 1;
}
r = init_comms();
if (r) {
dm_hash_destroy(_locks);
return r;
}
DEBUGLOG("Single-node cluster initialised.\n");
return 0;
}
static void _cluster_closedown(void)
{
close_comms();
DEBUGLOG("cluster_closedown\n");
destroy_lvhash();
dm_hash_destroy(_locks);
_locks = NULL;
_lockid = 0;
}
static void _get_our_csid(char *csid)
{
int nodeid = 1;
memcpy(csid, &nodeid, sizeof(int));
}
static int _csid_from_name(char *csid, const char *name)
{
return 1;
}
static int _name_from_csid(const char *csid, char *name)
{
sprintf(name, "SINGLENODE");
return 0;
}
static int _get_num_nodes(void)
{
return 1;
}
/* Node is now known to be running a clvmd */
static void _add_up_node(const char *csid)
{
}
/* Call a callback for each node, so the caller knows whether it's up or down */
static int _cluster_do_node_callback(struct local_client *master_client,
void (*callback)(struct local_client *,
const char *csid, int node_up))
{
return 0;
}
int _lock_file(const char *file, uint32_t flags);
static pthread_mutex_t _lock_mutex = PTHREAD_MUTEX_INITIALIZER;
/* Using one common condition for all locks for simplicity */
static pthread_cond_t _lock_cond = PTHREAD_COND_INITIALIZER;
/* Real locking */
static int _lock_resource(const char *resource, int mode, int flags, int *lockid)
{
struct lock *lck;
DEBUGLOG("Locking resource %s, flags=%d, mode=%d\n",
resource, flags, mode);
mode &= LCK_TYPE_MASK;
pthread_mutex_lock(&_lock_mutex);
retry:
if (!(lck = dm_hash_lookup(_locks, resource))) {
/* Add new locked resource */
if (!(lck = dm_zalloc(sizeof(struct lock))) ||
!dm_hash_insert(_locks, resource, lck))
goto bad;
lck->lockid = ++_lockid;
goto out;
}
/* Update/convert lock */
if (flags == LCKF_CONVERT) {
if (lck->excl)
mode = LCK_EXCL;
} else if ((lck->mode == LCK_WRITE) || (lck->mode == LCK_EXCL)) {
DEBUGLOG("Resource %s already %s locked (%d)...\n", resource,
(lck->mode == LCK_WRITE) ? "write" : "exclusively", lck->lockid);
goto maybe_retry;
} else if (lck->mode > mode) {
DEBUGLOG("Resource %s already locked and %s lock requested...\n",
resource,
(mode == LCK_READ) ? "READ" :
(mode == LCK_WRITE) ? "WRITE" : "EXCLUSIVE");
goto maybe_retry;
}
out:
*lockid = lck->lockid;
lck->mode = mode;
lck->excl |= (mode == LCK_EXCL);
DEBUGLOG("Locked resource %s, lockid=%d, mode=%d\n", resource, lck->lockid, mode);
pthread_cond_broadcast(&_lock_cond); /* wakeup waiters */
pthread_mutex_unlock(&_lock_mutex);
return 0;
maybe_retry:
if (!(flags & LCK_NONBLOCK)) {
pthread_cond_wait(&_lock_cond, &_lock_mutex);
DEBUGLOG("Resource %s RETRYING lock...\n", resource);
goto retry;
}
bad:
DEBUGLOG("Failed to lock resource %s\n", resource);
pthread_mutex_unlock(&_lock_mutex);
return 1; /* fail */
}
static int _unlock_resource(const char *resource, int lockid)
{
struct lock *lck;
if (lockid < 0) {
DEBUGLOG("Not tracking unlock of lockid -1: %s, lockid=%d\n",
resource, lockid);
return 0;
}
DEBUGLOG("Unlocking resource %s, lockid=%d\n", resource, lockid);
pthread_mutex_lock(&_lock_mutex);
if (!(lck = dm_hash_lookup(_locks, resource))) {
pthread_mutex_unlock(&_lock_mutex);
DEBUGLOG("Resource %s, lockid=%d is not locked.\n", resource, lockid);
return 1;
}
if (lck->lockid != lockid) {
pthread_mutex_unlock(&_lock_mutex);
DEBUGLOG("Resource %s has wrong lockid %d, expected %d.\n",
resource, lck->lockid, lockid);
return 1;
}
dm_hash_remove(_locks, resource);
dm_free(lck);
pthread_cond_broadcast(&_lock_cond); /* wakeup waiters */
pthread_mutex_unlock(&_lock_mutex);
return 0;
}
static int _is_quorate(void)
{
return 1;
}
static int _get_main_cluster_fd(void)
{
return listen_fd;
}
static int _cluster_fd_callback(struct local_client *fd, char *buf, int len,
const char *csid,
struct local_client **new_client)
{
return 1;
}
static int _cluster_send_message(const void *buf, int msglen,
const char *csid,
const char *errtext)
{
return 0;
}
static int _get_cluster_name(char *buf, int buflen)
{
strncpy(buf, "localcluster", buflen);
buf[buflen - 1] = 0;
return 0;
}
static struct cluster_ops _cluster_singlenode_ops = {
.name = "singlenode",
.cluster_init_completed = NULL,
.cluster_send_message = _cluster_send_message,
.name_from_csid = _name_from_csid,
.csid_from_name = _csid_from_name,
.get_num_nodes = _get_num_nodes,
.cluster_fd_callback = _cluster_fd_callback,
.get_main_cluster_fd = _get_main_cluster_fd,
.cluster_do_node_callback = _cluster_do_node_callback,
.is_quorate = _is_quorate,
.get_our_csid = _get_our_csid,
.add_up_node = _add_up_node,
.reread_config = NULL,
.cluster_closedown = _cluster_closedown,
.get_cluster_name = _get_cluster_name,
.sync_lock = _lock_resource,
.sync_unlock = _unlock_resource,
};
struct cluster_ops *init_singlenode_cluster(void)
{
if (!_init_cluster())
return &_cluster_singlenode_ops;
else
return NULL;
}

File diff suppressed because it is too large Load Diff

View File

@@ -20,9 +20,6 @@
#define CLVMD_MINOR_VERSION 2
#define CLVMD_PATCH_VERSION 1
/* Name of the cluster LVM admin lock */
#define ADMIN_LOCK_NAME "CLVMD_ADMIN"
/* Default time (in seconds) we will wait for all remote commands to execute
before declaring them dead */
#define DEFAULT_CMD_TIMEOUT 60
@@ -115,11 +112,14 @@ extern void cmd_client_cleanup(struct local_client *client);
extern int add_client(struct local_client *new_client);
extern void clvmd_cluster_init_completed(void);
extern void process_message(struct local_client *client, const char *buf,
extern void process_message(struct local_client *client, char *buf,
int len, const char *csid);
extern void debuglog(const char *fmt, ... )
__attribute__ ((format(printf, 1, 2)));
void clvmd_set_debug(debug_t new_de);
debug_t clvmd_get_debug(void);
int sync_lock(const char *resource, int mode, int flags, int *lockid);
int sync_unlock(const char *resource, int lockid);

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -13,29 +13,9 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include "clvmd-common.h"
#include <configure.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <syslog.h>
#include <assert.h>
#include <libdevmapper.h>
#include <libdlm.h>
#include "lvm-types.h"
#include "clvm.h"
@@ -46,27 +26,25 @@
/* LVM2 headers */
#include "toolcontext.h"
#include "lvmcache.h"
#include "lvm-logging.h"
#include "lvm-globals.h"
#include "activate.h"
#include "locking.h"
#include "archiver.h"
#include "defaults.h"
#include "memlock.h"
#include <syslog.h>
static struct cmd_context *cmd = NULL;
static struct dm_hash_table *lv_hash = NULL;
static pthread_mutex_t lv_hash_lock;
static pthread_mutex_t lvm_lock;
static char last_error[1024];
static int suspended = 0;
struct lv_info {
int lock_id;
int lock_mode;
};
static const char *decode_locking_cmd(unsigned char cmdl)
static const char *decode_full_locking_cmd(uint32_t cmdl)
{
static char buf[128];
const char *type;
@@ -103,7 +81,7 @@ static const char *decode_locking_cmd(unsigned char cmdl)
command = "LCK_VG";
break;
case LCK_LV:
scope = "LV";
scope = "LV";
switch (cmdl & LCK_MASK) {
case LCK_LV_EXCLUSIVE & LCK_MASK:
command = "LCK_LV_EXCLUSIVE";
@@ -131,30 +109,46 @@ static const char *decode_locking_cmd(unsigned char cmdl)
break;
}
sprintf(buf, "0x%x %s (%s|%s%s%s%s%s%s)", cmdl, command, type, scope,
sprintf(buf, "0x%x %s (%s|%s%s%s%s%s)", cmdl, command, type, scope,
cmdl & LCK_NONBLOCK ? "|NONBLOCK" : "",
cmdl & LCK_HOLD ? "|HOLD" : "",
cmdl & LCK_LOCAL ? "|LOCAL" : "",
cmdl & LCK_CLUSTER_VG ? "|CLUSTER_VG" : "",
cmdl & LCK_CACHE ? "|CACHE" : "");
return buf;
}
/*
* Only processes 8 bits: excludes LCK_CACHE.
*/
static const char *decode_locking_cmd(unsigned char cmdl)
{
return decode_full_locking_cmd((uint32_t) cmdl);
}
static const char *decode_flags(unsigned char flags)
{
static char buf[128];
int len;
sprintf(buf, "0x%x (%s%s%s%s)", flags,
flags & LCK_PARTIAL_MODE ? "PARTIAL_MODE " : "",
flags & LCK_MIRROR_NOSYNC_MODE ? "MIRROR_NOSYNC " : "",
flags & LCK_DMEVENTD_MONITOR_MODE ? "DMEVENTD_MONITOR " : "",
flags & LCK_CONVERT ? "CONVERT " : "");
len = sprintf(buf, "0x%x ( %s%s%s%s%s%s%s)", flags,
flags & LCK_PARTIAL_MODE ? "PARTIAL_MODE|" : "",
flags & LCK_MIRROR_NOSYNC_MODE ? "MIRROR_NOSYNC|" : "",
flags & LCK_DMEVENTD_MONITOR_MODE ? "DMEVENTD_MONITOR|" : "",
flags & LCK_ORIGIN_ONLY_MODE ? "ORIGIN_ONLY|" : "",
flags & LCK_TEST_MODE ? "TEST|" : "",
flags & LCK_CONVERT ? "CONVERT|" : "",
flags & LCK_DMEVENTD_MONITOR_IGNORE ? "DMEVENTD_MONITOR_IGNORE|" : "");
if (len > 1)
buf[len - 2] = ' ';
else
buf[0] = '\0';
return buf;
}
char *get_last_lvm_error()
char *get_last_lvm_error(void)
{
return last_error;
}
@@ -173,11 +167,15 @@ static struct lv_info *lookup_info(const char *resource)
return lvi;
}
static void insert_info(const char *resource, struct lv_info *lvi)
static int insert_info(const char *resource, struct lv_info *lvi)
{
int ret;
pthread_mutex_lock(&lv_hash_lock);
dm_hash_insert(lv_hash, resource, lvi);
ret = dm_hash_insert(lv_hash, resource, lvi);
pthread_mutex_unlock(&lv_hash_lock);
return ret;
}
static void remove_info(const char *resource)
@@ -201,16 +199,16 @@ static int get_current_lock(char *resource)
}
void init_lvhash()
void init_lvhash(void)
{
/* Create hash table for keeping LV locks & status */
lv_hash = dm_hash_create(100);
lv_hash = dm_hash_create(1024);
pthread_mutex_init(&lv_hash_lock, NULL);
pthread_mutex_init(&lvm_lock, NULL);
}
/* Called at shutdown to tidy the lockspace */
void destroy_lvhash()
void destroy_lvhash(void)
{
struct dm_hash_node *v;
struct lv_info *lvi;
@@ -236,24 +234,35 @@ void destroy_lvhash()
}
/* Gets a real lock and keeps the info in the hash table */
int hold_lock(char *resource, int mode, int flags)
static int hold_lock(char *resource, int mode, int flags)
{
int status;
int saved_errno;
struct lv_info *lvi;
if (test_mode())
return 0;
/* Mask off invalid options */
flags &= LKF_NOQUEUE | LKF_CONVERT;
flags &= LCKF_NOQUEUE | LCKF_CONVERT;
lvi = lookup_info(resource);
if (lvi && lvi->lock_mode == mode) {
DEBUGLOG("hold_lock, lock mode %d already held\n", mode);
return 0;
if (lvi) {
if (lvi->lock_mode == mode) {
DEBUGLOG("hold_lock, lock mode %d already held\n",
mode);
return 0;
}
if ((lvi->lock_mode == LCK_EXCL) && (mode == LCK_WRITE)) {
DEBUGLOG("hold_lock, lock already held LCK_EXCL, "
"ignoring LCK_WRITE request");
return 0;
}
}
/* Only allow explicit conversions */
if (lvi && !(flags & LKF_CONVERT)) {
if (lvi && !(flags & LCKF_CONVERT)) {
errno = EBUSY;
return -1;
}
@@ -272,18 +281,23 @@ int hold_lock(char *resource, int mode, int flags)
errno = saved_errno;
} else {
lvi = malloc(sizeof(struct lv_info));
if (!lvi)
if (!lvi) {
errno = ENOMEM;
return -1;
}
lvi->lock_mode = mode;
status = sync_lock(resource, mode, flags & ~LKF_CONVERT, &lvi->lock_id);
status = sync_lock(resource, mode, flags & ~LCKF_CONVERT, &lvi->lock_id);
saved_errno = errno;
if (status) {
free(lvi);
DEBUGLOG("hold_lock. lock at %d failed: %s\n", mode,
strerror(errno));
} else
insert_info(resource, lvi);
if (!insert_info(resource, lvi)) {
errno = ENOMEM;
return -1;
}
errno = saved_errno;
}
@@ -291,12 +305,15 @@ int hold_lock(char *resource, int mode, int flags)
}
/* Unlock and remove it from the hash table */
int hold_unlock(char *resource)
static int hold_unlock(char *resource)
{
struct lv_info *lvi;
int status;
int saved_errno;
if (test_mode())
return 0;
if (!(lvi = lookup_info(resource))) {
DEBUGLOG("hold_unlock, lock not already held\n");
return 0;
@@ -323,7 +340,7 @@ int hold_unlock(char *resource)
*/
/* Activate LV exclusive or non-exclusive */
static int do_activate_lv(char *resource, unsigned char lock_flags, int mode)
static int do_activate_lv(char *resource, unsigned char command, unsigned char lock_flags, int mode)
{
int oldmode;
int status;
@@ -333,7 +350,7 @@ static int do_activate_lv(char *resource, unsigned char lock_flags, int mode)
/* Is it already open ? */
oldmode = get_current_lock(resource);
if (oldmode == mode && (lock_flags & LCK_CLUSTER_VG)) {
if (oldmode == mode && (command & LCK_CLUSTER_VG)) {
DEBUGLOG("do_activate_lv, lock already held at %d\n", oldmode);
return 0; /* Nothing to do */
}
@@ -346,9 +363,9 @@ static int do_activate_lv(char *resource, unsigned char lock_flags, int mode)
return 0; /* Success, we did nothing! */
/* Do we need to activate exclusively? */
if ((activate_lv == 2) || (mode == LKM_EXMODE)) {
if ((activate_lv == 2) || (mode == LCK_EXCL)) {
exclusive = 1;
mode = LKM_EXMODE;
mode = LCK_EXCL;
}
/*
@@ -356,8 +373,8 @@ static int do_activate_lv(char *resource, unsigned char lock_flags, int mode)
* Use lock conversion only if requested, to prevent implicit conversion
* of exclusive lock to shared one during activation.
*/
if (lock_flags & LCK_CLUSTER_VG) {
status = hold_lock(resource, mode, LKF_NOQUEUE | (lock_flags & LCK_CONVERT?LKF_CONVERT:0));
if (command & LCK_CLUSTER_VG) {
status = hold_lock(resource, mode, LCKF_NOQUEUE | (lock_flags & LCK_CONVERT ? LCKF_CONVERT:0));
if (status) {
/* Return an LVM-sensible error for this.
* Forcing EIO makes the upper level return this text
@@ -372,71 +389,86 @@ static int do_activate_lv(char *resource, unsigned char lock_flags, int mode)
}
/* If it's suspended then resume it */
if (!lv_info_by_lvid(cmd, resource, &lvi, 0, 0))
return EIO;
if (!lv_info_by_lvid(cmd, resource, 0, &lvi, 0, 0))
goto error;
if (lvi.suspended)
if (!lv_resume(cmd, resource))
return EIO;
if (lvi.suspended) {
critical_section_inc(cmd, "resuming");
if (!lv_resume(cmd, resource, 0)) {
critical_section_dec(cmd, "resumed");
goto error;
}
}
/* Now activate it */
if (!lv_activate(cmd, resource, exclusive))
return EIO;
goto error;
return 0;
error:
if (oldmode == -1 || oldmode != mode)
(void)hold_unlock(resource);
return EIO;
}
/* Resume the LV if it was active */
static int do_resume_lv(char *resource, unsigned char lock_flags)
static int do_resume_lv(char *resource, unsigned char command, unsigned char lock_flags)
{
int oldmode;
int oldmode, origin_only, exclusive, revert;
/* Is it open ? */
oldmode = get_current_lock(resource);
if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) {
if (oldmode == -1 && (command & LCK_CLUSTER_VG)) {
DEBUGLOG("do_resume_lv, lock not already held\n");
return 0; /* We don't need to do anything */
}
origin_only = (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0;
exclusive = (oldmode == LCK_EXCL) ? 1 : 0;
revert = (lock_flags & LCK_REVERT_MODE) ? 1 : 0;
if (!lv_resume_if_active(cmd, resource))
if (!lv_resume_if_active(cmd, resource, origin_only, exclusive, revert))
return EIO;
return 0;
}
/* Suspend the device if active */
static int do_suspend_lv(char *resource, unsigned char lock_flags)
static int do_suspend_lv(char *resource, unsigned char command, unsigned char lock_flags)
{
int oldmode;
struct lvinfo lvi;
unsigned origin_only = (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0;
unsigned exclusive;
/* Is it open ? */
oldmode = get_current_lock(resource);
if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) {
if (oldmode == -1 && (command & LCK_CLUSTER_VG)) {
DEBUGLOG("do_suspend_lv, lock not already held\n");
return 0; /* Not active, so it's OK */
}
exclusive = (oldmode == LCK_EXCL) ? 1 : 0;
/* Only suspend it if it exists */
if (!lv_info_by_lvid(cmd, resource, &lvi, 0, 0))
if (!lv_info_by_lvid(cmd, resource, origin_only, &lvi, 0, 0))
return EIO;
if (lvi.exists &&
!lv_suspend_if_active(cmd, resource, origin_only, exclusive))
return EIO;
if (lvi.exists) {
if (!lv_suspend_if_active(cmd, resource)) {
return EIO;
}
}
return 0;
}
static int do_deactivate_lv(char *resource, unsigned char lock_flags)
static int do_deactivate_lv(char *resource, unsigned char command, unsigned char lock_flags)
{
int oldmode;
int status;
/* Is it open ? */
oldmode = get_current_lock(resource);
if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) {
if (oldmode == -1 && (command & LCK_CLUSTER_VG)) {
DEBUGLOG("do_deactivate_lock, lock not already held\n");
return 0; /* We don't need to do anything */
}
@@ -444,7 +476,7 @@ static int do_deactivate_lv(char *resource, unsigned char lock_flags)
if (!lv_deactivate(cmd, resource))
return EIO;
if (lock_flags & LCK_CLUSTER_VG) {
if (command & LCK_CLUSTER_VG) {
status = hold_unlock(resource);
if (status)
return errno;
@@ -460,12 +492,11 @@ const char *do_lock_query(char *resource)
mode = get_current_lock(resource);
switch (mode) {
case LKM_NLMODE: type = "NL"; break;
case LKM_CRMODE: type = "CR"; break;
case LKM_CWMODE: type = "CW"; break;
case LKM_PRMODE: type = "PR"; break;
case LKM_PWMODE: type = "PW"; break;
case LKM_EXMODE: type = "EX"; break;
case LCK_NULL: type = "NL"; break;
case LCK_READ: type = "CR"; break;
case LCK_PREAD:type = "PR"; break;
case LCK_WRITE:type = "PW"; break;
case LCK_EXCL: type = "EX"; break;
}
DEBUGLOG("do_lock_query: resource '%s', mode %i (%s)\n", resource, mode, type ?: "?");
@@ -479,8 +510,8 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
{
int status = 0;
DEBUGLOG("do_lock_lv: resource '%s', cmd = %s, flags = %s, memlock = %d\n",
resource, decode_locking_cmd(command), decode_flags(lock_flags), memlock());
DEBUGLOG("do_lock_lv: resource '%s', cmd = %s, flags = %s, critical_section = %d\n",
resource, decode_locking_cmd(command), decode_flags(lock_flags), critical_section());
if (!cmd->config_valid || config_files_changed(cmd)) {
/* Reinitialise various settings inc. logging, filters */
@@ -494,8 +525,14 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
if (lock_flags & LCK_MIRROR_NOSYNC_MODE)
init_mirror_in_sync(1);
if (!(lock_flags & LCK_DMEVENTD_MONITOR_MODE))
init_dmeventd_monitor(0);
if (lock_flags & LCK_DMEVENTD_MONITOR_IGNORE)
init_dmeventd_monitor(DMEVENTD_MONITOR_IGNORE);
else {
if (lock_flags & LCK_DMEVENTD_MONITOR_MODE)
init_dmeventd_monitor(1);
else
init_dmeventd_monitor(0);
}
cmd->partial_activation = (lock_flags & LCK_PARTIAL_MODE) ? 1 : 0;
@@ -504,28 +541,24 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
switch (command & LCK_MASK) {
case LCK_LV_EXCLUSIVE:
status = do_activate_lv(resource, lock_flags, LKM_EXMODE);
status = do_activate_lv(resource, command, lock_flags, LCK_EXCL);
break;
case LCK_LV_SUSPEND:
status = do_suspend_lv(resource, lock_flags);
if (!status)
suspended++;
status = do_suspend_lv(resource, command, lock_flags);
break;
case LCK_UNLOCK:
case LCK_LV_RESUME: /* if active */
status = do_resume_lv(resource, lock_flags);
if (!status)
suspended--;
status = do_resume_lv(resource, command, lock_flags);
break;
case LCK_LV_ACTIVATE:
status = do_activate_lv(resource, lock_flags, LKM_CRMODE);
status = do_activate_lv(resource, command, lock_flags, LCK_READ);
break;
case LCK_LV_DEACTIVATE:
status = do_deactivate_lv(resource, lock_flags);
status = do_deactivate_lv(resource, command, lock_flags);
break;
default:
@@ -537,16 +570,13 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
if (lock_flags & LCK_MIRROR_NOSYNC_MODE)
init_mirror_in_sync(0);
if (!(lock_flags & LCK_DMEVENTD_MONITOR_MODE))
init_dmeventd_monitor(DEFAULT_DMEVENTD_MONITOR);
cmd->partial_activation = 0;
/* clean the pool for another command */
dm_pool_empty(cmd->mem);
pthread_mutex_unlock(&lvm_lock);
DEBUGLOG("Command return is %d, memlock is %d\n", status, memlock());
DEBUGLOG("Command return is %d, critical_section is %d\n", status, critical_section());
return status;
}
@@ -556,14 +586,14 @@ int pre_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
/* Nearly all the stuff happens cluster-wide. Apart from SUSPEND. Here we get the
lock out on this node (because we are the node modifying the metadata)
before suspending cluster-wide.
LKF_CONVERT is used always, local node is going to modify metadata
LCKF_CONVERT is used always, local node is going to modify metadata
*/
if ((command & (LCK_SCOPE_MASK | LCK_TYPE_MASK)) == LCK_LV_SUSPEND &&
(lock_flags & LCK_CLUSTER_VG)) {
(command & LCK_CLUSTER_VG)) {
DEBUGLOG("pre_lock_lv: resource '%s', cmd = %s, flags = %s\n",
resource, decode_locking_cmd(command), decode_flags(lock_flags));
if (hold_lock(resource, LKM_PWMODE, LKF_NOQUEUE | LKF_CONVERT))
if (hold_lock(resource, LCK_WRITE, LCKF_NOQUEUE | LCKF_CONVERT))
return errno;
}
return 0;
@@ -574,10 +604,11 @@ int post_lock_lv(unsigned char command, unsigned char lock_flags,
char *resource)
{
int status;
unsigned origin_only = (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0;
/* Opposite of above, done on resume after a metadata update */
if ((command & (LCK_SCOPE_MASK | LCK_TYPE_MASK)) == LCK_LV_RESUME &&
(lock_flags & LCK_CLUSTER_VG)) {
(command & LCK_CLUSTER_VG)) {
int oldmode;
DEBUGLOG
@@ -586,22 +617,20 @@ int post_lock_lv(unsigned char command, unsigned char lock_flags,
/* If the lock state is PW then restore it to what it was */
oldmode = get_current_lock(resource);
if (oldmode == LKM_PWMODE) {
if (oldmode == LCK_WRITE) {
struct lvinfo lvi;
pthread_mutex_lock(&lvm_lock);
status = lv_info_by_lvid(cmd, resource, &lvi, 0, 0);
status = lv_info_by_lvid(cmd, resource, origin_only, &lvi, 0, 0);
pthread_mutex_unlock(&lvm_lock);
if (!status)
return EIO;
if (lvi.exists) {
if (hold_lock(resource, LKM_CRMODE, LKF_CONVERT))
if (hold_lock(resource, LCK_READ, LCKF_CONVERT))
return errno;
} else {
if (hold_unlock(resource))
return errno;
}
} else if (hold_unlock(resource))
return errno;
}
}
return 0;
@@ -617,7 +646,7 @@ int do_check_lvm1(const char *vgname)
return status == 1 ? 0 : EBUSY;
}
int do_refresh_cache()
int do_refresh_cache(void)
{
DEBUGLOG("Refreshing context\n");
log_notice("Refreshing context");
@@ -639,46 +668,6 @@ int do_refresh_cache()
return 0;
}
/* Only called at gulm startup. Drop any leftover VG or P_orphan locks
that might be hanging around if we died for any reason
*/
static void drop_vg_locks()
{
char vg[128];
char line[255];
FILE *vgs =
popen
("lvm pvs --config 'log{command_names=0 prefix=\"\"}' --nolocking --noheadings -o vg_name", "r");
sync_unlock("P_" VG_ORPHANS, LCK_EXCL);
sync_unlock("P_" VG_GLOBAL, LCK_EXCL);
if (!vgs)
return;
while (fgets(line, sizeof(line), vgs)) {
char *vgend;
char *vgstart;
if (line[strlen(line)-1] == '\n')
line[strlen(line)-1] = '\0';
vgstart = line + strspn(line, " ");
vgend = vgstart + strcspn(vgstart, " ");
*vgend = '\0';
if (strncmp(vgstart, "WARNING:", 8) == 0)
continue;
sprintf(vg, "V_%s", vgstart);
sync_unlock(vg, LCK_EXCL);
}
if (fclose(vgs))
DEBUGLOG("vgs fclose failed: %s\n", strerror(errno));
}
/*
* Handle VG lock - drop metadata or update lvmcache state
*/
@@ -687,15 +676,6 @@ void do_lock_vg(unsigned char command, unsigned char lock_flags, char *resource)
uint32_t lock_cmd = command;
char *vgname = resource + 2;
DEBUGLOG("do_lock_vg: resource '%s', cmd = %s, flags = %s, memlock = %d\n",
resource, decode_locking_cmd(command), decode_flags(lock_flags), memlock());
/* P_#global causes a full cache refresh */
if (!strcmp(resource, "P_" VG_GLOBAL)) {
do_refresh_cache();
return;
}
lock_cmd &= (LCK_SCOPE_MASK | LCK_TYPE_MASK | LCK_HOLD);
/*
@@ -704,6 +684,15 @@ void do_lock_vg(unsigned char command, unsigned char lock_flags, char *resource)
if (strncmp(resource, "P_#", 3) && !strncmp(resource, "P_", 2))
lock_cmd |= LCK_CACHE;
DEBUGLOG("do_lock_vg: resource '%s', cmd = %s, flags = %s, critical_section = %d\n",
resource, decode_full_locking_cmd(lock_cmd), decode_flags(lock_flags), critical_section());
/* P_#global causes a full cache refresh */
if (!strcmp(resource, "P_" VG_GLOBAL)) {
do_refresh_cache();
return;
}
pthread_mutex_lock(&lvm_lock);
switch (lock_cmd) {
case LCK_VG_COMMIT:
@@ -727,21 +716,29 @@ void do_lock_vg(unsigned char command, unsigned char lock_flags, char *resource)
* but this may not be the case...
* I suppose this also comes in handy if clvmd crashes, not that it would!
*/
static void *get_initial_state()
static int get_initial_state(struct dm_hash_table *excl_uuid)
{
int lock_mode;
char lv[64], vg[64], flags[25], vg_flags[25];
char uuid[65];
char line[255];
FILE *lvs =
popen
("lvm lvs --config 'log{command_names=0 prefix=\"\"}' --nolocking --noheadings -o vg_uuid,lv_uuid,lv_attr,vg_attr",
"r");
char *lvs_cmd;
const char *lvm_binary = getenv("LVM_BINARY") ? : LVM_PATH;
FILE *lvs;
if (!lvs)
return NULL;
if (dm_asprintf(&lvs_cmd, "%s lvs --config 'log{command_names=0 prefix=\"\"}' "
"--nolocking --noheadings -o vg_uuid,lv_uuid,lv_attr,vg_attr",
lvm_binary) < 0)
return_0;
/* FIXME: Maybe link and use liblvm2cmd directly instead of fork */
if (!(lvs = popen(lvs_cmd, "r"))) {
dm_free(lvs_cmd);
return 0;
}
while (fgets(line, sizeof(line), lvs)) {
if (sscanf(line, "%s %s %s %s\n", vg, lv, flags, vg_flags) == 4) {
if (sscanf(line, "%64s %64s %25s %25s\n", vg, lv, flags, vg_flags) == 4) {
/* States: s:suspended a:active S:dropped snapshot I:invalid snapshot */
if (strlen(vg) == 38 && /* is is a valid UUID ? */
@@ -764,14 +761,23 @@ static void *get_initial_state()
memcpy(&uuid[58], &lv[32], 6);
uuid[64] = '\0';
/* Look for this lock in the list of EX locks
we were passed on the command-line */
lock_mode = (dm_hash_lookup(excl_uuid, uuid)) ?
LCK_EXCL : LCK_READ;
DEBUGLOG("getting initial lock for %s\n", uuid);
hold_lock(uuid, LKM_CRMODE, LKF_NOQUEUE);
if (hold_lock(uuid, lock_mode, LCKF_NOQUEUE))
DEBUGLOG("Failed to hold lock %s\n", uuid);
}
}
}
if (fclose(lvs))
DEBUGLOG("lvs fclose failed: %s\n", strerror(errno));
return NULL;
dm_free(lvs_cmd);
return 1;
}
static void lvm2_log_fn(int level, const char *file, int line, int dm_errno,
@@ -798,7 +804,7 @@ static void lvm2_log_fn(int level, const char *file, int line, int dm_errno,
}
/* This checks some basic cluster-LVM configuration stuff */
static void check_config()
static void check_config(void)
{
int locking_type;
@@ -827,27 +833,68 @@ void lvm_do_backup(const char *vgname)
struct volume_group * vg;
int consistent = 0;
DEBUGLOG("Triggering backup of VG metadata for %s. suspended=%d\n", vgname, suspended);
DEBUGLOG("Triggering backup of VG metadata for %s.\n", vgname);
pthread_mutex_lock(&lvm_lock);
vg = vg_read_internal(cmd, vgname, NULL /*vgid*/, &consistent);
vg = vg_read_internal(cmd, vgname, NULL /*vgid*/, 1, &consistent);
if (vg && consistent)
check_current_backup(vg);
else
log_error("Error backing up metadata, can't find VG for group %s", vgname);
vg_release(vg);
release_vg(vg);
dm_pool_empty(cmd->mem);
pthread_mutex_unlock(&lvm_lock);
}
/* Called to initialise the LVM context of the daemon */
int init_lvm(int using_gulm)
struct dm_hash_node *get_next_excl_lock(struct dm_hash_node *v, char **name)
{
if (!(cmd = create_toolcontext(1, NULL))) {
struct lv_info *lvi;
*name = NULL;
if (!v)
v = dm_hash_get_first(lv_hash);
do {
if (v) {
lvi = dm_hash_get_data(lv_hash, v);
DEBUGLOG("Looking for EX locks. found %x mode %d\n", lvi->lock_id, lvi->lock_mode);
if (lvi->lock_mode == LCK_EXCL) {
*name = dm_hash_get_key(lv_hash, v);
}
v = dm_hash_get_next(lv_hash, v);
}
} while (v && !*name);
if (*name)
DEBUGLOG("returning EXclusive UUID %s\n", *name);
return v;
}
void lvm_do_fs_unlock(void)
{
pthread_mutex_lock(&lvm_lock);
DEBUGLOG("Syncing device names\n");
fs_unlock();
pthread_mutex_unlock(&lvm_lock);
}
/* Called to initialise the LVM context of the daemon */
int init_clvm(struct dm_hash_table *excl_uuid)
{
/* Use LOG_DAEMON for syslog messages instead of LOG_USER */
init_syslog(LOG_DAEMON);
openlog("clvmd", LOG_PID, LOG_DAEMON);
/* Initialise already held locks */
if (!get_initial_state(excl_uuid))
log_error("Cannot load initial lock states.");
if (!(cmd = create_toolcontext(1, NULL, 0, 1))) {
log_error("Failed to allocate command context");
return 0;
}
@@ -857,30 +904,24 @@ int init_lvm(int using_gulm)
return 0;
}
/* Use LOG_DAEMON for syslog messages instead of LOG_USER */
init_syslog(LOG_DAEMON);
openlog("clvmd", LOG_PID, LOG_DAEMON);
cmd->cmd_line = "clvmd";
/* Check lvm.conf is setup for cluster-LVM */
check_config();
init_ignore_suspended_devices(1);
/* Remove any non-LV locks that may have been left around */
if (using_gulm)
drop_vg_locks();
get_initial_state();
/* Trap log messages so we can pass them back to the user */
init_log_fn(lvm2_log_fn);
memlock_inc_daemon(cmd);
return 1;
}
void destroy_lvm(void)
{
if (cmd)
if (cmd) {
memlock_dec_daemon(cmd);
destroy_toolcontext(cmd);
}
cmd = NULL;
}

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -27,15 +27,15 @@ extern int post_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
char *resource);
extern int do_check_lvm1(const char *vgname);
extern int do_refresh_cache(void);
extern int init_lvm(int using_gulm);
extern int init_clvm(struct dm_hash_table *excl_uuid);
extern void destroy_lvm(void);
extern void init_lvhash(void);
extern void destroy_lvhash(void);
extern void lvm_do_backup(const char *vgname);
extern int hold_unlock(char *resource);
extern int hold_lock(char *resource, int mode, int flags);
extern char *get_last_lvm_error(void);
extern void do_lock_vg(unsigned char command, unsigned char lock_flags,
char *resource);
extern struct dm_hash_node *get_next_excl_lock(struct dm_hash_node *v, char **name);
void lvm_do_fs_unlock(void);
#endif

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -13,27 +13,21 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* FIXME Remove duplicated functions from this file. */
/*
* Tell all clvmds in a cluster to refresh their toolcontext
* Send a command to a running clvmd from the command-line
*/
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include <configure.h>
#include <stddef.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <errno.h>
#include <unistd.h>
#include <libdevmapper.h>
#include <stdint.h>
#include <stdio.h>
#include <limits.h>
#include "clvmd-common.h"
#include "clvm.h"
#include "refresh_clvmd.h"
#include <stddef.h>
#include <sys/socket.h>
#include <sys/un.h>
typedef struct lvm_response {
char node[255];
char *response;
@@ -83,12 +77,12 @@ static int _open_local_sock(void)
}
/* Send a request and return the status */
static int _send_request(const char *inbuf, int inlen, char **retbuf)
static int _send_request(const char *inbuf, int inlen, char **retbuf, int no_response)
{
char outbuf[PIPE_BUF];
struct clvm_header *outheader = (struct clvm_header *) outbuf;
int len;
int off;
unsigned off;
int buflen;
int err;
@@ -100,6 +94,8 @@ static int _send_request(const char *inbuf, int inlen, char **retbuf)
fprintf(stderr, "Error writing data to clvmd: %s", strerror(errno));
return 0;
}
if (no_response)
return 1;
/* Get the response */
reread:
@@ -155,36 +151,38 @@ static int _send_request(const char *inbuf, int inlen, char **retbuf)
/* Build the structure header and parse-out wildcard node names */
static void _build_header(struct clvm_header *head, int cmd, const char *node,
int len)
unsigned int len)
{
head->cmd = cmd;
head->status = 0;
head->flags = 0;
head->xid = 0;
head->clientid = 0;
head->arglen = len;
if (len)
/* 1 byte is used from struct clvm_header.args[1], so -> len - 1 */
head->arglen = len - 1;
else {
head->arglen = 0;
*head->args = '\0';
}
if (node) {
/*
* Allow a couple of special node names:
* "*" for all nodes,
* "." for the local node only
*/
if (strcmp(node, "*") == 0) {
head->node[0] = '\0';
} else if (strcmp(node, ".") == 0) {
head->node[0] = '\0';
head->flags = CLVMD_FLAG_LOCAL;
} else
strcpy(head->node, node);
} else
/*
* Translate special node names.
*/
if (!node || !strcmp(node, NODE_ALL))
head->node[0] = '\0';
else if (!strcmp(node, NODE_LOCAL)) {
head->node[0] = '\0';
head->flags = CLVMD_FLAG_LOCAL;
} else
strcpy(head->node, node);
}
/*
* Send a message to a(or all) node(s) in the cluster and wait for replies
*/
static int _cluster_request(char cmd, const char *node, void *data, int len,
lvm_response_t ** response, int *num)
lvm_response_t ** response, int *num, int no_response)
{
char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1];
char *inptr;
@@ -204,11 +202,12 @@ static int _cluster_request(char cmd, const char *node, void *data, int len,
return 0;
_build_header(head, cmd, node, len);
memcpy(head->node + strlen(head->node) + 1, data, len);
if (len)
memcpy(head->node + strlen(head->node) + 1, data, len);
status = _send_request(outbuf, sizeof(struct clvm_header) +
strlen(head->node) + len, &retbuf);
if (!status)
strlen(head->node) + len, &retbuf, no_response);
if (!status || no_response)
goto out;
/* Count the number of responses we got */
@@ -287,16 +286,16 @@ static int _cluster_free_request(lvm_response_t * response, int num)
return 1;
}
int refresh_clvmd()
int refresh_clvmd(int all_nodes)
{
int num_responses;
char args[1]; // No args really.
lvm_response_t *response;
lvm_response_t *response = NULL;
int saved_errno;
int status;
int i;
status = _cluster_request(CLVMD_CMD_REFRESH, "*", args, 0, &response, &num_responses);
status = _cluster_request(CLVMD_CMD_REFRESH, all_nodes ? NODE_ALL : NODE_LOCAL, args, 0, &response, &num_responses, 0);
/* If any nodes were down then display them and return an error */
for (i = 0; i < num_responses; i++) {
@@ -323,23 +322,42 @@ int refresh_clvmd()
return status;
}
int restart_clvmd(int all_nodes)
{
int dummy, status;
status = _cluster_request(CLVMD_CMD_RESTART, all_nodes ? NODE_ALL : NODE_LOCAL, NULL, 0, NULL, &dummy, 1);
/*
* FIXME: we cannot receive response, clvmd re-exec before it.
* but also should not close socket too early (the whole rq is dropped then).
* FIXME: This should be handled this way:
* - client waits for RESTART ack (and socket close)
* - server restarts
* - client checks that server is ready again (VERSION command?)
*/
usleep(500000);
return status;
}
int debug_clvmd(int level, int clusterwide)
{
int num_responses;
char args[1];
const char *nodes;
lvm_response_t *response;
lvm_response_t *response = NULL;
int saved_errno;
int status;
int i;
args[0] = level;
if (clusterwide)
nodes = "*";
nodes = NODE_ALL;
else
nodes = ".";
nodes = NODE_LOCAL;
status = _cluster_request(CLVMD_CMD_SET_DEBUG, nodes, args, 1, &response, &num_responses);
status = _cluster_request(CLVMD_CMD_SET_DEBUG, nodes, args, 1, &response, &num_responses, 0);
/* If any nodes were down then display them and return an error */
for (i = 0; i < num_responses; i++) {

View File

@@ -13,6 +13,7 @@
*/
int refresh_clvmd(void);
int refresh_clvmd(int all_nodes);
int restart_clvmd(int all_nodes);
int debug_clvmd(int level, int clusterwide);

View File

@@ -1,511 +0,0 @@
/*
* Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* This provides the inter-clvmd communications for a system without CMAN.
* There is a listening TCP socket which accepts new connections in the
* normal way.
* It can also make outgoing connnections to the other clvmd nodes.
*/
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include <configure.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <fcntl.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <syslog.h>
#include <netdb.h>
#include <assert.h>
#include <libdevmapper.h>
#include "clvm.h"
#include "clvmd-comms.h"
#include "clvmd.h"
#include "clvmd-gulm.h"
#define DEFAULT_TCP_PORT 21064
static int listen_fd = -1;
static int tcp_port;
struct dm_hash_table *sock_hash;
static int get_our_ip_address(char *addr, int *family);
static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid,
struct local_client **new_client);
/* Called by init_cluster() to open up the listening socket */
int init_comms(unsigned short port)
{
struct sockaddr_in6 addr;
sock_hash = dm_hash_create(100);
tcp_port = port ? : DEFAULT_TCP_PORT;
listen_fd = socket(AF_INET6, SOCK_STREAM, 0);
if (listen_fd < 0)
{
return -1;
}
else
{
int one = 1;
setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int));
setsockopt(listen_fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
}
memset(&addr, 0, sizeof(addr)); // Bind to INADDR_ANY
addr.sin6_family = AF_INET6;
addr.sin6_port = htons(tcp_port);
if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0)
{
DEBUGLOG("Can't bind to port: %s\n", strerror(errno));
syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port);
close(listen_fd);
return -1;
}
listen(listen_fd, 5);
/* Set Close-on-exec */
fcntl(listen_fd, F_SETFD, 1);
return 0;
}
void tcp_remove_client(const char *c_csid)
{
struct local_client *client;
char csid[GULM_MAX_CSID_LEN];
unsigned int i;
memcpy(csid, c_csid, sizeof csid);
DEBUGLOG("tcp_remove_client\n");
/* Don't actually close the socket here - that's the
job of clvmd.c whch will do the job when it notices the
other end has gone. We just need to remove the client(s) from
the hash table so we don't try to use it for sending any more */
for (i = 0; i < 2; i++)
{
client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
if (client)
{
dm_hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
client->removeme = 1;
close(client->fd);
}
/* Look for a mangled one too, on the 2nd iteration. */
csid[0] ^= 0x80;
}
}
int alloc_client(int fd, const char *c_csid, struct local_client **new_client)
{
struct local_client *client;
char csid[GULM_MAX_CSID_LEN];
memcpy(csid, c_csid, sizeof csid);
DEBUGLOG("alloc_client %d csid = %s\n", fd, print_csid(csid));
/* Create a local_client and return it */
client = malloc(sizeof(struct local_client));
if (!client)
{
DEBUGLOG("malloc failed\n");
return -1;
}
memset(client, 0, sizeof(struct local_client));
client->fd = fd;
client->type = CLUSTER_DATA_SOCK;
client->callback = read_from_tcpsock;
if (new_client)
*new_client = client;
/* Add to our list of node sockets */
if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
{
DEBUGLOG("alloc_client mangling CSID for second connection\n");
/* This is a duplicate connection but we can't close it because
the other end may already have started sending.
So, we mangle the IP address and keep it, all sending will
go out of the main FD
*/
csid[0] ^= 0x80;
client->bits.net.flags = 1; /* indicate mangled CSID */
/* If it still exists then kill the connection as we should only
ever have one incoming connection from each node */
if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
{
DEBUGLOG("Multiple incoming connections from node\n");
syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]);
free(client);
errno = ECONNREFUSED;
return -1;
}
}
dm_hash_insert_binary(sock_hash, csid, GULM_MAX_CSID_LEN, client);
return 0;
}
int get_main_gulm_cluster_fd()
{
return listen_fd;
}
/* Read on main comms (listen) socket, accept it */
int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid,
struct local_client **new_client)
{
int newfd;
struct sockaddr_in6 addr;
socklen_t addrlen = sizeof(addr);
int status;
char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN];
DEBUGLOG("cluster_fd_callback\n");
*new_client = NULL;
newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno);
if (!newfd)
{
syslog(LOG_ERR, "error in accept: %m");
errno = EAGAIN;
return -1; /* Don't return an error or clvmd will close the listening FD */
}
/* Check that the client is a member of the cluster
and reject if not.
*/
if (gulm_name_from_csid((char *)&addr.sin6_addr, name) < 0)
{
syslog(LOG_ERR, "Got connect from non-cluster node %s\n",
print_csid((char *)&addr.sin6_addr));
DEBUGLOG("Got connect from non-cluster node %s\n",
print_csid((char *)&addr.sin6_addr));
close(newfd);
errno = EAGAIN;
return -1;
}
status = alloc_client(newfd, (char *)&addr.sin6_addr, new_client);
if (status)
{
DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status);
close(newfd);
/* See above... */
errno = EAGAIN;
return -1;
}
DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client);
return newfd;
}
/* Try to get at least 'len' bytes from the socket */
static int really_read(int fd, char *buf, int len)
{
int got, offset;
got = offset = 0;
do {
got = read(fd, buf+offset, len-offset);
DEBUGLOG("really_read. got %d bytes\n", got);
offset += got;
} while (got > 0 && offset < len);
if (got < 0)
return got;
else
return offset;
}
static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid,
struct local_client **new_client)
{
struct sockaddr_in6 addr;
socklen_t slen = sizeof(addr);
struct clvm_header *header = (struct clvm_header *)buf;
int status;
uint32_t arglen;
DEBUGLOG("read_from_tcpsock fd %d\n", client->fd);
*new_client = NULL;
/* Get "csid" */
getpeername(client->fd, (struct sockaddr *)&addr, &slen);
memcpy(csid, &addr.sin6_addr, GULM_MAX_CSID_LEN);
/* Read just the header first, then get the rest if there is any.
* Stream sockets, sigh.
*/
status = really_read(client->fd, buf, sizeof(struct clvm_header));
if (status > 0)
{
int status2;
arglen = ntohl(header->arglen);
/* Get the rest */
if (arglen && arglen < GULM_MAX_CLUSTER_MESSAGE)
{
status2 = really_read(client->fd, buf+status, arglen);
if (status2 > 0)
status += status2;
else
status = status2;
}
}
DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno);
/* Remove it from the hash table if there's an error, clvmd will
remove the socket from its lists and free the client struct */
if (status == 0 ||
(status < 0 && errno != EAGAIN && errno != EINTR))
{
char remcsid[GULM_MAX_CSID_LEN];
memcpy(remcsid, csid, GULM_MAX_CSID_LEN);
close(client->fd);
/* If the csid was mangled, then make sure we remove the right entry */
if (client->bits.net.flags)
remcsid[0] ^= 0x80;
dm_hash_remove_binary(sock_hash, remcsid, GULM_MAX_CSID_LEN);
/* Tell cluster manager layer */
add_down_node(remcsid);
}
else {
gulm_add_up_node(csid);
/* Send it back to clvmd */
process_message(client, buf, status, csid);
}
return status;
}
int gulm_connect_csid(const char *csid, struct local_client **newclient)
{
int fd;
struct sockaddr_in6 addr;
int status;
int one = 1;
DEBUGLOG("Connecting socket\n");
fd = socket(PF_INET6, SOCK_STREAM, 0);
if (fd < 0)
{
syslog(LOG_ERR, "Unable to create new socket: %m");
return -1;
}
addr.sin6_family = AF_INET6;
memcpy(&addr.sin6_addr, csid, GULM_MAX_CSID_LEN);
addr.sin6_port = htons(tcp_port);
DEBUGLOG("Connecting socket %d\n", fd);
if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in6)) < 0)
{
/* "Connection refused" is "normal" because clvmd may not yet be running
* on that node.
*/
if (errno != ECONNREFUSED)
{
syslog(LOG_ERR, "Unable to connect to remote node: %m");
}
DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno));
close(fd);
return -1;
}
/* Set Close-on-exec */
fcntl(fd, F_SETFD, 1);
setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
status = alloc_client(fd, csid, newclient);
if (status)
close(fd);
else
add_client(*newclient);
/* If we can connect to it, it must be running a clvmd */
gulm_add_up_node(csid);
return status;
}
/* Send a message to a known CSID */
static int tcp_send_message(void *buf, int msglen, const char *csid, const char *errtext)
{
int status;
struct local_client *client;
char ourcsid[GULM_MAX_CSID_LEN];
assert(csid);
DEBUGLOG("tcp_send_message, csid = %s, msglen = %d\n", print_csid(csid), msglen);
/* Don't connect to ourself */
get_our_gulm_csid(ourcsid);
if (memcmp(csid, ourcsid, GULM_MAX_CSID_LEN) == 0)
return msglen;
client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
if (!client)
{
status = gulm_connect_csid(csid, &client);
if (status)
return -1;
}
DEBUGLOG("tcp_send_message, fd = %d\n", client->fd);
return write(client->fd, buf, msglen);
}
int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext)
{
int status=0;
DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen);
/* If csid is NULL then send to all known (not just connected) nodes */
if (!csid)
{
void *context = NULL;
char loop_csid[GULM_MAX_CSID_LEN];
/* Loop round all gulm-known nodes */
while (get_next_node_csid(&context, loop_csid))
{
status = tcp_send_message(buf, msglen, loop_csid, errtext);
if (status == 0 ||
(status < 0 && (errno == EAGAIN || errno == EINTR)))
break;
}
}
else
{
status = tcp_send_message(buf, msglen, csid, errtext);
}
return status;
}
/* To get our own IP address we get the locally bound address of the
socket that's talking to GULM in the assumption(eek) that it will
be on the "right" network in a multi-homed system */
static int get_our_ip_address(char *addr, int *family)
{
struct utsname info;
uname(&info);
get_ip_address(info.nodename, addr);
return 0;
}
/* Public version of above for those that don't care what protocol
we're using */
void get_our_gulm_csid(char *csid)
{
static char our_csid[GULM_MAX_CSID_LEN];
static int got_csid = 0;
if (!got_csid)
{
int family;
memset(our_csid, 0, sizeof(our_csid));
if (get_our_ip_address(our_csid, &family))
{
got_csid = 1;
}
}
memcpy(csid, our_csid, GULM_MAX_CSID_LEN);
}
static void map_v4_to_v6(struct in_addr *ip4, struct in6_addr *ip6)
{
ip6->s6_addr32[0] = 0;
ip6->s6_addr32[1] = 0;
ip6->s6_addr32[2] = htonl(0xffff);
ip6->s6_addr32[3] = ip4->s_addr;
}
/* Get someone else's IP address from DNS */
int get_ip_address(const char *node, char *addr)
{
struct hostent *he;
memset(addr, 0, GULM_MAX_CSID_LEN);
// TODO: what do we do about multi-homed hosts ???
// CCSs ip_interfaces solved this but some bugger removed it.
/* Try IPv6 first. The man page for gethostbyname implies that
it will lookup ip6 & ip4 names, but it seems not to */
he = gethostbyname2(node, AF_INET6);
if (he)
{
memcpy(addr, he->h_addr_list[0],
he->h_length);
}
else
{
he = gethostbyname2(node, AF_INET);
if (!he)
return -1;
map_v4_to_v6((struct in_addr *)he->h_addr_list[0], (struct in6_addr *)addr);
}
return 0;
}
char *print_csid(const char *csid)
{
static char buf[128];
int *icsid = (int *)csid;
sprintf(buf, "[%x.%x.%x.%x]",
icsid[0],icsid[1],icsid[2],icsid[3]);
return buf;
}

View File

@@ -1,13 +0,0 @@
#include <netinet/in.h>
#define GULM_MAX_CLUSTER_MESSAGE 1600
#define GULM_MAX_CSID_LEN sizeof(struct in6_addr)
#define GULM_MAX_CLUSTER_MEMBER_NAME_LEN 128
extern int init_comms(unsigned short);
extern char *print_csid(const char *);
int get_main_gulm_cluster_fd(void);
int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid, struct local_client **new_client);
int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext);
void get_our_gulm_csid(char *csid);
int gulm_connect_csid(const char *csid, struct local_client **newclient);

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2009 Red Hat, Inc. All rights reserved.
# Copyright (C) 2009-2010 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -14,7 +14,6 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
CPG_LIBS = @CPG_LIBS@
CPG_CFLAGS = @CPG_CFLAGS@
@@ -25,16 +24,15 @@ SOURCES = clogd.c cluster.c compat.c functions.c link_mon.c local.c logging.c
TARGETS = cmirrord
include ../../make.tmpl
include $(top_builddir)/make.tmpl
LIBS += -ldevmapper
LMLIBS += $(CPG_LIBS) $(SACKPT_LIBS)
CFLAGS += $(CPG_CFLAGS) $(SACKPT_CFLAGS)
cmirrord: $(OBJECTS) $(top_builddir)/lib/liblvm-internal.a
$(CC) $(CFLAGS) $(LDFLAGS) -o cmirrord $(OBJECTS) \
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) \
$(LVMLIBS) $(LMLIBS) $(LIBS)
install: $(TARGETS)
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) cmirrord \
$(usrsbindir)/cmirrord
$(INSTALL_PROGRAM) -D cmirrord $(usrsbindir)/cmirrord

View File

@@ -18,6 +18,7 @@
#include <errno.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
@@ -31,7 +32,7 @@ static void daemonize(void);
static void init_all(void);
static void cleanup_all(void);
int main(int argc __attribute((unused)), char *argv[] __attribute((unused)))
int main(int argc __attribute__((unused)), char *argv[] __attribute__((unused)))
{
daemonize();
@@ -59,54 +60,11 @@ int main(int argc __attribute((unused)), char *argv[] __attribute((unused)))
* @sig: the signal
*
*/
static void parent_exit_handler(int sig __attribute((unused)))
static void parent_exit_handler(int sig __attribute__((unused)))
{
exit_now = 1;
}
/*
* create_lockfile - create and lock a lock file
* @lockfile: location of lock file
*
* Returns: 0 on success, -1 otherwise
*/
static int create_lockfile(const char *lockfile)
{
int fd;
struct flock lock;
char buffer[50];
if((fd = open(lockfile, O_CREAT | O_WRONLY,
(S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))) < 0)
return -errno;
lock.l_type = F_WRLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = 0;
if (fcntl(fd, F_SETLK, &lock) < 0) {
close(fd);
return -errno;
}
if (ftruncate(fd, 0) < 0) {
close(fd);
return -errno;
}
sprintf(buffer, "%d\n", getpid());
/* FIXME Handle other non-error returns without aborting */
if (write(fd, buffer, strlen(buffer)) < strlen(buffer)){
close(fd);
unlink(lockfile);
return -errno;
}
return 0;
}
static void sig_handler(int sig)
{
/* FIXME Races - don't touch signal_mask here. */
@@ -161,6 +119,12 @@ static void process_signals(void)
}
}
static void remove_lockfile(void)
{
if (unlink(CMIRRORD_PIDFILE))
LOG_ERROR("Unable to remove \"" CMIRRORD_PIDFILE "\" %s", strerror(errno));
}
/*
* daemonize
*
@@ -170,6 +134,12 @@ static void daemonize(void)
{
int pid;
int status;
int devnull;
if ((devnull = open("/dev/null", O_RDWR)) == -1) {
LOG_ERROR("Can't open /dev/null: %s", strerror(errno));
exit(EXIT_FAILURE);
}
signal(SIGTERM, &parent_exit_handler);
@@ -218,15 +188,24 @@ static void daemonize(void)
chdir("/");
umask(0);
close(0); close(1); close(2);
open("/dev/null", O_RDONLY); /* reopen stdin */
open("/dev/null", O_WRONLY); /* reopen stdout */
open("/dev/null", O_WRONLY); /* reopen stderr */
if (close(0) || close(1) || close(2)) {
LOG_ERROR("Failed to close terminal FDs");
exit(EXIT_FAILURE);
}
if ((dup2(devnull, 0) < 0) || /* reopen stdin */
(dup2(devnull, 1) < 0) || /* reopen stdout */
(dup2(devnull, 2) < 0)) /* reopen stderr */
exit(EXIT_FAILURE);
LOG_OPEN("cmirrord", LOG_PID, LOG_DAEMON);
if (create_lockfile(CMIRRORD_PIDFILE))
(void) dm_prepare_selinux_context(CMIRRORD_PIDFILE, S_IFREG);
if (dm_create_lockfile(CMIRRORD_PIDFILE) == 0)
exit(EXIT_LOCKFILE);
(void) dm_prepare_selinux_context(NULL, 0);
atexit(remove_lockfile);
/* FIXME Replace with sigaction. (deprecated) */
signal(SIGINT, &sig_handler);

View File

@@ -20,10 +20,12 @@
#include <corosync/cpg.h>
#include <errno.h>
#include <openais/saAis.h>
#include <openais/saCkpt.h>
#include <signal.h>
#include <unistd.h>
#if CMIRROR_HAS_CHECKPOINT
#include <openais/saAis.h>
#include <openais/saCkpt.h>
#endif
/* Open AIS error codes */
#define str_ais_error(x) \
@@ -62,13 +64,13 @@
RQ_TYPE((x) & ~DM_ULOG_RESPONSE)
static uint32_t my_cluster_id = 0xDEAD;
#if CMIRROR_HAS_CHECKPOINT
static SaCkptHandleT ckpt_handle = 0;
static SaCkptCallbacksT callbacks = { 0, 0 };
static SaVersionT version = { 'B', 1, 1 };
#endif
#define DEBUGGING_HISTORY 100
//static char debugging[DEBUGGING_HISTORY][128];
//static int idx = 0;
#define LOG_SPRINT(cc, f, arg...) do { \
cc->idx++; \
cc->idx = cc->idx % DEBUGGING_HISTORY; \
@@ -77,6 +79,7 @@ static SaVersionT version = { 'B', 1, 1 };
static int log_resp_rec = 0;
#define RECOVERING_REGION_SECTION_SIZE 64
struct checkpoint_data {
uint32_t requester;
char uuid[CPG_MAX_NAME_LENGTH];
@@ -86,7 +89,7 @@ struct checkpoint_data {
char *clean_bits;
char *recovering_region;
struct checkpoint_data *next;
};
};
#define INVALID 0
#define VALID 1
@@ -128,8 +131,7 @@ static struct dm_list clog_cpg_list;
int cluster_send(struct clog_request *rq)
{
int r;
int count=0;
int found;
int found = 0;
struct iovec iov;
struct clog_cpg *entry;
@@ -165,7 +167,10 @@ int cluster_send(struct clog_request *rq)
if (entry->cpg_state != VALID)
return -EINVAL;
#if CMIRROR_HAS_CHECKPOINT
do {
int count = 0;
r = cpg_mcast_joined(entry->handle, CPG_TYPE_AGREED, &iov, 1);
if (r != SA_AIS_ERR_TRY_AGAIN)
break;
@@ -189,12 +194,14 @@ int cluster_send(struct clog_request *rq)
str_ais_error(r));
usleep(1000);
} while (1);
if (r == CPG_OK)
#else
r = cpg_mcast_joined(entry->handle, CPG_TYPE_AGREED, &iov, 1);
#endif
if (r == CS_OK)
return 0;
/* error codes found in openais/cpg.h */
LOG_ERROR("cpg_mcast_joined error: %s", str_ais_error(r));
LOG_ERROR("cpg_mcast_joined error: %d", r);
rq->u_rq.error = -EBADE;
return -EBADE;
@@ -215,7 +222,7 @@ static struct clog_request *get_matching_rq(struct clog_request *rq,
}
static char rq_buffer[DM_ULOG_REQUEST_SIZE];
static int handle_cluster_request(struct clog_cpg *entry __attribute((unused)),
static int handle_cluster_request(struct clog_cpg *entry __attribute__((unused)),
struct clog_request *rq, int server)
{
int r = 0;
@@ -419,6 +426,7 @@ static void free_checkpoint(struct checkpoint_data *cp)
free(cp);
}
#if CMIRROR_HAS_CHECKPOINT
static int export_checkpoint(struct checkpoint_data *cp)
{
SaCkptCheckpointCreationAttributesT attr;
@@ -587,7 +595,54 @@ rr_create_retry:
return 0;
}
static int import_checkpoint(struct clog_cpg *entry, int no_read)
#else
static int export_checkpoint(struct checkpoint_data *cp)
{
int r, rq_size;
struct clog_request *rq;
rq_size = sizeof(*rq);
rq_size += RECOVERING_REGION_SECTION_SIZE;
rq_size += cp->bitmap_size * 2; /* clean|sync_bits */
rq = malloc(rq_size);
if (!rq) {
LOG_ERROR("export_checkpoint: "
"Unable to allocate transfer structs");
return -ENOMEM;
}
memset(rq, 0, rq_size);
dm_list_init(&rq->u.list);
rq->u_rq.request_type = DM_ULOG_CHECKPOINT_READY;
rq->originator = cp->requester;
strncpy(rq->u_rq.uuid, cp->uuid, CPG_MAX_NAME_LENGTH);
rq->u_rq.seq = my_cluster_id;
rq->u_rq.data_size = rq_size - sizeof(*rq);
/* Sync bits */
memcpy(rq->u_rq.data, cp->sync_bits, cp->bitmap_size);
/* Clean bits */
memcpy(rq->u_rq.data + cp->bitmap_size, cp->clean_bits, cp->bitmap_size);
/* Recovering region */
memcpy(rq->u_rq.data + (cp->bitmap_size * 2), cp->recovering_region,
strlen(cp->recovering_region));
r = cluster_send(rq);
if (r)
LOG_ERROR("Failed to send checkpoint ready notice: %s",
strerror(-r));
free(rq);
return 0;
}
#endif /* CMIRROR_HAS_CHECKPOINT */
#if CMIRROR_HAS_CHECKPOINT
static int import_checkpoint(struct clog_cpg *entry, int no_read,
struct clog_request *rq __attribute__((unused)))
{
int rtn = 0;
SaCkptCheckpointHandleT h;
@@ -619,6 +674,7 @@ open_retry:
if (rv != SA_AIS_OK) {
LOG_ERROR("[%s] Failed to open checkpoint: %s",
SHORT_UUID(entry->name.value), str_ais_error(rv));
free(bitmap);
return -EIO; /* FIXME: better error */
}
@@ -647,6 +703,7 @@ init_retry:
if (rv != SA_AIS_OK) {
LOG_ERROR("[%s] Sync checkpoint section creation failed: %s",
SHORT_UUID(entry->name.value), str_ais_error(rv));
free(bitmap);
return -EIO; /* FIXME: better error */
}
@@ -740,6 +797,32 @@ no_read:
return rtn;
}
#else
static int import_checkpoint(struct clog_cpg *entry, int no_read,
struct clog_request *rq)
{
int bitmap_size;
bitmap_size = (rq->u_rq.data_size - RECOVERING_REGION_SECTION_SIZE) / 2;
if (bitmap_size < 0) {
LOG_ERROR("Checkpoint has invalid payload size.");
return -EINVAL;
}
if (pull_state(entry->name.value, entry->luid, "sync_bits",
rq->u_rq.data, bitmap_size) ||
pull_state(entry->name.value, entry->luid, "clean_bits",
rq->u_rq.data + bitmap_size, bitmap_size) ||
pull_state(entry->name.value, entry->luid, "recovering_region",
rq->u_rq.data + (bitmap_size * 2),
RECOVERING_REGION_SECTION_SIZE)) {
LOG_ERROR("Error loading bitmap state from checkpoint.");
return -EIO;
}
return 0;
}
#endif /* CMIRROR_HAS_CHECKPOINT */
static void do_checkpoints(struct clog_cpg *entry, int leaving)
{
struct checkpoint_data *cp;
@@ -828,9 +911,9 @@ static int resend_requests(struct clog_cpg *entry)
rq->u_rq.data_size = 0;
kernel_send(&rq->u_rq);
break;
default:
/*
* If an action or a response is required, then
@@ -855,15 +938,15 @@ static int resend_requests(struct clog_cpg *entry)
return r;
}
static int do_cluster_work(void *data __attribute((unused)))
static int do_cluster_work(void *data __attribute__((unused)))
{
int r = SA_AIS_OK;
struct clog_cpg *entry;
int r = CS_OK;
struct clog_cpg *entry, *tmp;
dm_list_iterate_items(entry, &clog_cpg_list) {
r = cpg_dispatch(entry->handle, CPG_DISPATCH_ALL);
if (r != SA_AIS_OK)
LOG_ERROR("cpg_dispatch failed: %s", str_ais_error(r));
dm_list_iterate_items_safe(entry, tmp, &clog_cpg_list) {
r = cpg_dispatch(entry->handle, CS_DISPATCH_ALL);
if (r != CS_OK)
LOG_ERROR("cpg_dispatch failed: %d", r);
if (entry->free_me) {
free(entry);
@@ -874,7 +957,7 @@ static int do_cluster_work(void *data __attribute((unused)))
resend_requests(entry);
}
return (r == SA_AIS_OK) ? 0 : -1; /* FIXME: good error number? */
return (r == CS_OK) ? 0 : -1; /* FIXME: good error number? */
}
static int flush_startup_list(struct clog_cpg *entry)
@@ -927,8 +1010,8 @@ static int flush_startup_list(struct clog_cpg *entry)
return 0;
}
static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gname __attribute((unused)),
uint32_t nodeid, uint32_t pid __attribute((unused)),
static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gname __attribute__((unused)),
uint32_t nodeid, uint32_t pid __attribute__((unused)),
void *msg, size_t msg_len)
{
int i;
@@ -939,16 +1022,19 @@ static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gna
struct clog_request *tmp_rq;
struct clog_cpg *match;
if (clog_request_from_network(rq, msg_len) < 0)
/* Error message comes from 'clog_request_from_network' */
return;
match = find_clog_cpg(handle);
if (!match) {
LOG_ERROR("Unable to find clog_cpg for cluster message");
return;
}
/*
* Perform necessary endian and version compatibility conversions
*/
if (clog_request_from_network(rq, msg_len) < 0)
/* Any error messages come from 'clog_request_from_network' */
return;
if ((nodeid == my_cluster_id) &&
!(rq->u_rq.request_type & DM_ULOG_RESPONSE) &&
(rq->u_rq.request_type != DM_ULOG_RESUME) &&
@@ -967,7 +1053,7 @@ static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gna
}
memcpy(tmp_rq, rq, sizeof(*rq) + rq->u_rq.data_size);
dm_list_init(&tmp_rq->u.list);
dm_list_add( &match->working_list, &tmp_rq->u.list);
dm_list_add(&match->working_list, &tmp_rq->u.list);
}
if (rq->u_rq.request_type == DM_ULOG_POSTSUSPEND) {
@@ -1020,7 +1106,8 @@ static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gna
/* Redundant checkpoints ignored if match->valid */
LOG_SPRINT(match, "[%s] CHECKPOINT_READY notification from %u",
SHORT_UUID(rq->u_rq.uuid), nodeid);
if (import_checkpoint(match, (match->state != INVALID))) {
if (import_checkpoint(match,
(match->state != INVALID), rq)) {
LOG_SPRINT(match,
"[%s] Failed to import checkpoint from %u",
SHORT_UUID(rq->u_rq.uuid), nodeid);
@@ -1144,11 +1231,11 @@ out:
_RQ_TYPE(rq->u_rq.request_type),
rq->originator, (response) ? "YES" : "NO");
else
LOG_SPRINT(match, "SEQ#=%u, UUID=%s, TYPE=%s, ORIG=%u, RESP=%s, RSPR=%u",
LOG_SPRINT(match, "SEQ#=%u, UUID=%s, TYPE=%s, ORIG=%u, RESP=%s, RSPR=%u, error=%d",
rq->u_rq.seq, SHORT_UUID(rq->u_rq.uuid),
_RQ_TYPE(rq->u_rq.request_type),
rq->originator, (response) ? "YES" : "NO",
nodeid);
nodeid, rq->u_rq.error);
}
}
@@ -1268,7 +1355,7 @@ static void cpg_leave_callback(struct clog_cpg *match,
match->free_me = 1;
match->lowest_id = 0xDEAD;
match->state = INVALID;
}
}
/* Remove any pending checkpoints for the leaving node. */
for (p_cp = NULL, c_cp = match->checkpoint_list;
@@ -1324,7 +1411,7 @@ static void cpg_leave_callback(struct clog_cpg *match,
left->nodeid);
return;
}
match->lowest_id = member_list[0].nodeid;
for (i = 0; i < member_list_entries; i++)
if (match->lowest_id > member_list[i].nodeid)
@@ -1366,7 +1453,7 @@ static void cpg_leave_callback(struct clog_cpg *match,
}
}
static void cpg_config_callback(cpg_handle_t handle, const struct cpg_name *gname __attribute((unused)),
static void cpg_config_callback(cpg_handle_t handle, const struct cpg_name *gname __attribute__((unused)),
const struct cpg_address *member_list,
size_t member_list_entries,
const struct cpg_address *left_list,
@@ -1413,6 +1500,7 @@ cpg_callbacks_t cpg_callbacks = {
*/
static int remove_checkpoint(struct clog_cpg *entry)
{
#if CMIRROR_HAS_CHECKPOINT
int len;
SaNameT name;
SaAisErrorT rv;
@@ -1442,7 +1530,7 @@ unlink_retry:
usleep(1000);
goto unlink_retry;
}
if (rv != SA_AIS_OK) {
LOG_ERROR("[%s] Failed to unlink checkpoint: %s",
SHORT_UUID(entry->name.value), str_ais_error(rv));
@@ -1452,6 +1540,10 @@ unlink_retry:
saCkptCheckpointClose(h);
return 1;
#else
/* No checkpoint to remove, so 'success' */
return 1;
#endif
}
int create_cluster_cpg(char *uuid, uint64_t luid)
@@ -1493,14 +1585,14 @@ int create_cluster_cpg(char *uuid, uint64_t luid)
SHORT_UUID(new->name.value));
r = cpg_initialize(&new->handle, &cpg_callbacks);
if (r != SA_AIS_OK) {
if (r != CS_OK) {
LOG_ERROR("cpg_initialize failed: Cannot join cluster");
free(new);
return -EPERM;
}
r = cpg_join(new->handle, &new->name);
if (r != SA_AIS_OK) {
if (r != CS_OK) {
LOG_ERROR("cpg_join failed: Cannot join cluster");
free(new);
return -EPERM;
@@ -1541,7 +1633,7 @@ static int _destroy_cluster_cpg(struct clog_cpg *del)
{
int r;
int state;
LOG_COND(log_resend_requests, "[%s] I am leaving.2.....",
SHORT_UUID(del->name.value));
@@ -1573,7 +1665,7 @@ static int _destroy_cluster_cpg(struct clog_cpg *del)
abort_startup(del);
r = cpg_leave(del->handle, &del->name);
if (r != CPG_OK)
if (r != CS_OK)
LOG_ERROR("Error leaving CPG!");
return 0;
}
@@ -1591,24 +1683,27 @@ int destroy_cluster_cpg(char *uuid)
int init_cluster(void)
{
#if CMIRROR_HAS_CHECKPOINT
SaAisErrorT rv;
dm_list_init(&clog_cpg_list);
rv = saCkptInitialize(&ckpt_handle, &callbacks, &version);
if (rv != SA_AIS_OK)
return EXIT_CLUSTER_CKPT_INIT;
#endif
dm_list_init(&clog_cpg_list);
return 0;
}
void cleanup_cluster(void)
{
#if CMIRROR_HAS_CHECKPOINT
SaAisErrorT err;
err = saCkptFinalize(ckpt_handle);
if (err != SA_AIS_OK)
LOG_ERROR("Failed to finalize checkpoint handle");
#endif
}
void cluster_debug(void)

View File

@@ -21,7 +21,7 @@
*/
#define COMPAT_OFFSET 256
static void v5_data_endian_switch(struct clog_request *rq, int to_network __attribute((unused)))
static void v5_data_endian_switch(struct clog_request *rq, int to_network __attribute__((unused)))
{
int i, end;
int64_t *pi64;

View File

@@ -15,7 +15,6 @@
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/kdev_t.h>
#include <signal.h>
#include <sys/stat.h>
#include <time.h>
@@ -55,6 +54,7 @@ struct log_c {
time_t delay; /* limits how fast a resume can happen after suspend */
int touched;
int in_sync; /* An in-sync that stays set until suspend/resume */
uint32_t region_size;
uint32_t region_count;
uint64_t sync_count;
@@ -107,7 +107,7 @@ static DM_LIST_INIT(log_pending_list);
static int log_test_bit(dm_bitset_t bs, int bit)
{
return dm_bit(bs, bit);
return dm_bit(bs, bit) ? 1 : 0;
}
static void log_set_bit(struct log_c *lc, dm_bitset_t bs, int bit)
@@ -291,7 +291,7 @@ static int write_log(struct log_c *lc)
}
/* FIXME Rewrite this function taking advantage of the udev changes (where in use) to improve its efficiency! */
static int find_disk_path(char *major_minor_str, char *path_rtn, int *unlink_path __attribute((unused)))
static int find_disk_path(char *major_minor_str, char *path_rtn, int *unlink_path __attribute__((unused)))
{
int r;
DIR *dp;
@@ -329,7 +329,10 @@ static int find_disk_path(char *major_minor_str, char *path_rtn, int *unlink_pat
*/
sprintf(path_rtn, "/dev/mapper/%s", dep->d_name);
stat(path_rtn, &statbuf);
if (stat(path_rtn, &statbuf) < 0) {
LOG_DBG("Unable to stat %s", path_rtn);
continue;
}
if (S_ISBLK(statbuf.st_mode) &&
(major(statbuf.st_rdev) == major) &&
(minor(statbuf.st_rdev) == minor)) {
@@ -366,14 +369,13 @@ static int _clog_ctr(char *uuid, uint64_t luid,
uint64_t region_size;
uint64_t region_count;
struct log_c *lc = NULL;
struct log_c *duplicate;
enum sync log_sync = DEFAULTSYNC;
uint32_t block_on_error = 0;
int disk_log = 0;
int disk_log;
char disk_path[128];
int unlink_path = 0;
size_t page_size;
long page_size;
int pages;
/* If core log request, then argv[0] will be region_size */
@@ -429,13 +431,12 @@ static int _clog_ctr(char *uuid, uint64_t luid,
block_on_error = 1;
}
lc = malloc(sizeof(*lc));
lc = dm_zalloc(sizeof(*lc));
if (!lc) {
LOG_ERROR("Unable to allocate cluster log context");
r = -ENOMEM;
goto fail;
}
memset(lc, 0, sizeof(*lc));
lc->region_size = region_size;
lc->region_count = region_count;
@@ -449,11 +450,11 @@ static int _clog_ctr(char *uuid, uint64_t luid,
strncpy(lc->uuid, uuid, DM_UUID_LEN);
lc->luid = luid;
if ((duplicate = get_log(lc->uuid, lc->luid)) ||
(duplicate = get_pending_log(lc->uuid, lc->luid))) {
if (get_log(lc->uuid, lc->luid) ||
get_pending_log(lc->uuid, lc->luid)) {
LOG_ERROR("[%s/%" PRIu64 "u] Log already exists, unable to create.",
SHORT_UUID(lc->uuid), lc->luid);
free(lc);
dm_free(lc);
return -EINVAL;
}
@@ -478,7 +479,12 @@ static int _clog_ctr(char *uuid, uint64_t luid,
lc->sync_count = (log_sync == NOSYNC) ? region_count : 0;
if (disk_log) {
page_size = sysconf(_SC_PAGESIZE);
if ((page_size = sysconf(_SC_PAGESIZE)) < 0) {
LOG_ERROR("Unable to read pagesize: %s",
strerror(errno));
r = errno;
goto fail;
}
pages = *(lc->clean_bits) / page_size;
pages += *(lc->clean_bits) % page_size ? 1 : 0;
pages += 1; /* for header */
@@ -491,7 +497,10 @@ static int _clog_ctr(char *uuid, uint64_t luid,
goto fail;
}
if (unlink_path)
unlink(disk_path);
if (unlink(disk_path) < 0) {
LOG_DBG("Warning: Unable to unlink log device, %s: %s",
disk_path, strerror(errno));
}
lc->disk_fd = r;
lc->disk_size = pages * page_size;
@@ -511,15 +520,13 @@ static int _clog_ctr(char *uuid, uint64_t luid,
return 0;
fail:
if (lc) {
if (lc->clean_bits)
free(lc->clean_bits);
if (lc->sync_bits)
free(lc->sync_bits);
if (lc->disk_buffer)
free(lc->disk_buffer);
if (lc->disk_fd >= 0)
close(lc->disk_fd);
free(lc);
if (lc->disk_fd >= 0 && close(lc->disk_fd))
LOG_ERROR("Close device error, %s: %s",
disk_path, strerror(errno));
free(lc->disk_buffer);
dm_free(lc->sync_bits);
dm_free(lc->clean_bits);
dm_free(lc);
}
return r;
}
@@ -590,7 +597,10 @@ static int clog_ctr(struct dm_ulog_request *rq)
/* We join the CPG when we resume */
/* No returning data */
rq->data_size = 0;
if ((rq->version > 1) && !strcmp(argv[0], "clustered-disk"))
rq->data_size = sprintf(rq->data, "%s", argv[1]) + 1;
else
rq->data_size = 0;
if (r) {
LOG_ERROR("Failed to create cluster log (%s)", rq->uuid);
@@ -634,9 +644,9 @@ static int clog_dtr(struct dm_ulog_request *rq)
close(lc->disk_fd);
if (lc->disk_buffer)
free(lc->disk_buffer);
free(lc->clean_bits);
free(lc->sync_bits);
free(lc);
dm_free(lc->clean_bits);
dm_free(lc->sync_bits);
dm_free(lc);
return 0;
}
@@ -721,6 +731,7 @@ static int clog_resume(struct dm_ulog_request *rq)
if (!lc)
return -EINVAL;
lc->in_sync = 0;
switch (lc->resume_override) {
case 1000:
LOG_ERROR("[%s] Additional resume issued before suspend",
@@ -773,7 +784,7 @@ static int clog_resume(struct dm_ulog_request *rq)
else if (lc->disk_nr_regions > lc->region_count)
LOG_DBG("[%s] Mirror has shrunk, updating log bits",
SHORT_UUID(lc->uuid));
break;
break;
case -EINVAL:
LOG_DBG("[%s] (Re)initializing mirror log - resync issued.",
SHORT_UUID(lc->uuid));
@@ -826,7 +837,7 @@ out:
lc->sync_search = 0;
lc->state = LOG_RESUMED;
lc->recovery_halted = 0;
return rq->error;
}
@@ -964,6 +975,42 @@ static int clog_in_sync(struct dm_ulog_request *rq)
return -EINVAL;
*rtn = log_test_bit(lc->sync_bits, region);
/*
* If the mirror was successfully recovered, we want to always
* force every machine to write to all devices - otherwise,
* corruption will occur. Here's how:
* Node1 suffers a failure and marks a region out-of-sync
* Node2 attempts a write, gets by is_remote_recovering,
* and queries the sync status of the region - finding
* it out-of-sync.
* Node2 thinks the write should be a nosync write, but it
* hasn't suffered the drive failure that Node1 has yet.
* It then issues a generic_make_request directly to
* the primary image only - which is exactly the device
* that has suffered the failure.
* Node2 suffers a lost write - which completely bypasses the
* mirror layer because it had gone through generic_m_r.
* The file system will likely explode at this point due to
* I/O errors. If it wasn't the primary that failed, it is
* easily possible in this case to issue writes to just one
* of the remaining images - also leaving the mirror inconsistent.
*
* We let in_sync() return 1 in a cluster regardless of what is
* in the bitmap once recovery has successfully completed on a
* mirror. This ensures the mirroring code will continue to
* attempt to write to all mirror images. The worst that can
* happen for reads is that additional read attempts may be
* taken.
*
* Futher investigation may be required to determine if there are
* similar possible outcomes when the mirror is in the process of
* recovering. In that case, lc->in_sync would not have been set
* yet.
*/
if (!*rtn && lc->in_sync)
*rtn = 1;
if (*rtn)
LOG_DBG("[%s] Region is in-sync: %llu",
SHORT_UUID(lc->uuid), (unsigned long long)region);
@@ -985,7 +1032,7 @@ static int clog_flush(struct dm_ulog_request *rq, int server)
{
int r = 0;
struct log_c *lc = get_log(rq->uuid, rq->luid);
if (!lc)
return -EINVAL;
@@ -1231,6 +1278,7 @@ static int clog_get_resync_work(struct dm_ulog_request *rq, uint32_t originator)
LOG_SPRINT(lc, "GET - SEQ#=%u, UUID=%s, nodeid = %u:: "
"Resync work complete.",
rq->seq, SHORT_UUID(lc->uuid), originator);
lc->sync_search = lc->region_count + 1;
return 0;
}
@@ -1283,7 +1331,7 @@ static int clog_set_region_sync(struct dm_ulog_request *rq, uint32_t originator)
lc->skip_bit_warning = lc->region_count;
if (pkg->region > (lc->skip_bit_warning + 5)) {
LOG_ERROR("*** Region #%llu skipped during recovery ***",
LOG_SPRINT(lc, "*** Region #%llu skipped during recovery ***",
(unsigned long long)lc->skip_bit_warning);
lc->skip_bit_warning = lc->region_count;
#ifdef DEBUG
@@ -1325,6 +1373,9 @@ static int clog_set_region_sync(struct dm_ulog_request *rq, uint32_t originator)
"(lc->sync_count > lc->region_count) - this is bad",
rq->seq, SHORT_UUID(lc->uuid), originator);
if (lc->sync_count == lc->region_count)
lc->in_sync = 1;
rq->data_size = 0;
return 0;
}
@@ -1370,17 +1421,23 @@ static int clog_get_sync_count(struct dm_ulog_request *rq, uint32_t originator)
return 0;
}
static int core_status_info(struct log_c *lc __attribute((unused)), struct dm_ulog_request *rq)
static int core_status_info(struct log_c *lc __attribute__((unused)), struct dm_ulog_request *rq)
{
int r;
char *data = (char *)rq->data;
rq->data_size = sprintf(data, "1 clustered-core");
r = sprintf(data, "1 clustered-core");
if (r < 0)
return r;
rq->data_size = r;
return 0;
}
static int disk_status_info(struct log_c *lc, struct dm_ulog_request *rq)
{
int r;
char *data = (char *)rq->data;
struct stat statbuf;
@@ -1389,9 +1446,13 @@ static int disk_status_info(struct log_c *lc, struct dm_ulog_request *rq)
return -errno;
}
rq->data_size = sprintf(data, "3 clustered-disk %d:%d %c",
major(statbuf.st_rdev), minor(statbuf.st_rdev),
(lc->log_dev_failed) ? 'D' : 'A');
r = sprintf(data, "3 clustered-disk %d:%d %c",
major(statbuf.st_rdev), minor(statbuf.st_rdev),
(lc->log_dev_failed) ? 'D' : 'A');
if (r < 0)
return r;
rq->data_size = r;
return 0;
}
@@ -1422,18 +1483,24 @@ static int clog_status_info(struct dm_ulog_request *rq)
static int core_status_table(struct log_c *lc, struct dm_ulog_request *rq)
{
int r;
char *data = (char *)rq->data;
rq->data_size = sprintf(data, "clustered-core %u %s%s ",
lc->region_size,
(lc->sync == DEFAULTSYNC) ? "" :
(lc->sync == NOSYNC) ? "nosync " : "sync ",
(lc->block_on_error) ? "block_on_error" : "");
r = sprintf(data, "clustered-core %u %s%s ",
lc->region_size,
(lc->sync == DEFAULTSYNC) ? "" :
(lc->sync == NOSYNC) ? "nosync " : "sync ",
(lc->block_on_error) ? "block_on_error" : "");
if (r < 0)
return r;
rq->data_size = r;
return 0;
}
static int disk_status_table(struct log_c *lc, struct dm_ulog_request *rq)
{
int r;
char *data = (char *)rq->data;
struct stat statbuf;
@@ -1442,12 +1509,16 @@ static int disk_status_table(struct log_c *lc, struct dm_ulog_request *rq)
return -errno;
}
rq->data_size = sprintf(data, "clustered-disk %d:%d %u %s%s ",
major(statbuf.st_rdev), minor(statbuf.st_rdev),
lc->region_size,
(lc->sync == DEFAULTSYNC) ? "" :
(lc->sync == NOSYNC) ? "nosync " : "sync ",
(lc->block_on_error) ? "block_on_error" : "");
r = sprintf(data, "clustered-disk %d:%d %u %s%s ",
major(statbuf.st_rdev), minor(statbuf.st_rdev),
lc->region_size,
(lc->sync == DEFAULTSYNC) ? "" :
(lc->sync == NOSYNC) ? "nosync " : "sync ",
(lc->block_on_error) ? "block_on_error" : "");
if (r < 0)
return r;
rq->data_size = r;
return 0;
}
@@ -1543,7 +1614,7 @@ out:
rq->data_size = sizeof(*pkg);
return 0;
return 0;
}
@@ -1556,6 +1627,11 @@ out:
* from this function. However, an inability to successfully
* perform the request will fill in the 'rq->error' field.
*
* 'rq' (or more correctly, rq->u_rq.data) should be of sufficient
* size to hold any returning data. Currently, local.c uses 2kiB
* to hold 'rq' - leaving ~1.5kiB for return data... more than
* enough for all the implemented functions here.
*
* Returns: 0 on success, -EXXX on error
*/
int do_request(struct clog_request *rq, int server)
@@ -1743,8 +1819,10 @@ int pull_state(const char *uuid, uint64_t luid,
int bitset_size;
struct log_c *lc;
if (!buf)
if (!buf) {
LOG_ERROR("pull_state: buf == NULL");
return -EINVAL;
}
lc = get_log(uuid, luid);
if (!lc) {
@@ -1753,8 +1831,11 @@ int pull_state(const char *uuid, uint64_t luid,
}
if (!strncmp(which, "recovering_region", 17)) {
sscanf(buf, "%llu %u", (unsigned long long *)&lc->recovering_region,
&lc->recoverer);
if (sscanf(buf, "%llu %u", (unsigned long long *)&lc->recovering_region,
&lc->recoverer) != 2) {
LOG_ERROR("cannot parse recovering region from: %s", buf);
return -EINVAL;
}
LOG_SPRINT(lc, "CKPT INIT - SEQ#=X, UUID=%s, nodeid = X:: "
"recovering_region=%llu, recoverer=%u",
SHORT_UUID(lc->uuid),

View File

@@ -58,7 +58,7 @@ int links_register(int fd, const char *name, int (*callback)(void *data), void *
free(lc);
return -ENOMEM;
}
pfds = tmp;
free_pfds = used_pfds + 1;
}
@@ -125,7 +125,7 @@ int links_monitor(void)
for (i = 0; i < used_pfds; i++)
if (pfds[i].revents & POLLIN) {
LOG_DBG("Data ready on %d", pfds[i].fd);
/* FIXME: Add this back return 1;*/
r++;
}

View File

@@ -214,7 +214,7 @@ static int kernel_send_helper(void *data, uint16_t out_size)
*
* Returns: 0 on success, -EXXX on failure
*/
static int do_local_work(void *data __attribute((unused)))
static int do_local_work(void *data __attribute__((unused)))
{
int r;
struct clog_request *rq;
@@ -237,7 +237,6 @@ static int do_local_work(void *data __attribute((unused)))
case DM_ULOG_GET_REGION_SIZE:
case DM_ULOG_IN_SYNC:
case DM_ULOG_GET_SYNC_COUNT:
case DM_ULOG_STATUS_INFO:
case DM_ULOG_STATUS_TABLE:
case DM_ULOG_PRESUSPEND:
/* We do not specify ourselves as server here */
@@ -249,7 +248,7 @@ static int do_local_work(void *data __attribute((unused)))
if (r)
LOG_ERROR("Failed to respond to kernel [%s]",
RQ_TYPE(u_rq->request_type));
break;
case DM_ULOG_RESUME:
/*
@@ -273,6 +272,7 @@ static int do_local_work(void *data __attribute((unused)))
case DM_ULOG_MARK_REGION:
case DM_ULOG_GET_RESYNC_WORK:
case DM_ULOG_SET_REGION_SYNC:
case DM_ULOG_STATUS_INFO:
case DM_ULOG_IS_REMOTE_RECOVERING:
case DM_ULOG_POSTSUSPEND:
r = cluster_send(rq);

View File

@@ -16,7 +16,7 @@
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#include <configure.h>
#include "configure.h"
#include <stdio.h>
#include <stdint.h>
#include <syslog.h>

View File

@@ -1,19 +1,4 @@
dm_event_handler_create
dm_event_handler_destroy
dm_event_handler_set_dso
dm_event_handler_set_dev_name
dm_event_handler_set_uuid
dm_event_handler_set_major
dm_event_handler_set_minor
dm_event_handler_set_event_mask
dm_event_handler_get_dso
dm_event_handler_get_devname
dm_event_handler_get_uuid
dm_event_handler_get_major
dm_event_handler_get_minor
dm_event_handler_get_event_mask
dm_event_register_handler
dm_event_unregister_handler
dm_event_get_registered_device
dm_event_handler_set_timeout
dm_event_handler_get_timeout
init_fifos
fini_fifos
daemon_talk
dm_event_get_version

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
# Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of the device-mapper userspace tools.
#
@@ -14,9 +14,10 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
SOURCES = libdevmapper-event.c
SOURCES2 = dmeventd.c
TARGETS = dmeventd
.PHONY: install_lib_dynamic install_lib_static install_include \
@@ -26,78 +27,77 @@ TARGETS = dmeventd
INSTALL_DMEVENTD_TARGETS = install_dmeventd_dynamic
INSTALL_LIB_TARGETS = install_lib_dynamic
LIB_NAME = libdevmapper-event
ifeq ("@STATIC_LINK@", "yes")
LIB_STATIC = libdevmapper-event.a
TARGETS += dmeventd.static
LIB_STATIC = $(LIB_NAME).a
TARGETS += $(LIB_STATIC) dmeventd.static
INSTALL_DMEVENTD_TARGETS += install_dmeventd_static
INSTALL_LIB_TARGETS += install_lib_static
endif
LIB_VERSION = $(LIB_VERSION_DM)
LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX)
ifeq ("@LIB_SUFFIX@","dylib")
LIB_SHARED = libdevmapper-event.dylib
else
LIB_SHARED = libdevmapper-event.so
VERSIONED_SHLIB = $(LIB_SHARED).$(LIB_VERSION)
endif
CLEAN_TARGETS = dmeventd.o dmeventd.static
CLEAN_TARGETS = dmeventd.static $(LIB_NAME).a
ifneq ($(MAKECMDGOALS),device-mapper)
SUBDIRS+=plugins
endif
include ../../make.tmpl
CFLOW_LIST = $(SOURCES)
CFLOW_LIST_TARGET = $(LIB_NAME).cflow
CFLOW_TARGET = dmeventd
EXPORTED_HEADER = $(srcdir)/libdevmapper-event.h
EXPORTED_FN_PREFIX = dm_event
include $(top_builddir)/make.tmpl
all: device-mapper
device-mapper: $(TARGETS)
LIBS += -ldl
LVMLIBS += -ldevmapper-event -lpthread -ldevmapper
LIBS += -ldevmapper
LVMLIBS += -ldevmapper-event $(PTHREAD_LIBS)
$(VERSIONED_SHLIB): $(LIB_SHARED)
$(RM) -f $@
$(LN_S) $(LIB_SHARED) $@
dmeventd: $(LIB_SHARED) $(VERSIONED_SHLIB) dmeventd.o
$(CC) $(CFLAGS) $(LDFLAGS) -L. -o $@ dmeventd.o \
$(LVMLIBS) $(LIBS) -rdynamic
dmeventd: $(LIB_SHARED) dmeventd.o
$(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -L. -o $@ dmeventd.o \
$(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic
dmeventd.static: $(LIB_STATIC) dmeventd.o $(interfacebuilddir)/libdevmapper.a
$(CC) $(CFLAGS) $(LDFLAGS) -static -L. -L$(interfacebuilddir) -o $@ \
dmeventd.o $(LVMLIBS) $(LIBS) -rdynamic
$(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -static -L. -L$(interfacebuilddir) -o $@ \
dmeventd.o $(DL_LIBS) $(LVMLIBS) $(LIBS) $(STATIC_LIBS)
ifeq ("@PKGCONFIG@", "yes")
INSTALL_LIB_TARGETS += install_pkgconfig
endif
install_include:
$(INSTALL) -D $(OWNER) $(GROUP) -m 444 $(srcdir)/libdevmapper-event.h \
$(includedir)/libdevmapper-event.h
ifneq ("$(CFLOW_CMD)", "")
CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES))
-include $(top_builddir)/libdm/libdevmapper.cflow
-include $(top_builddir)/lib/liblvm-internal.cflow
-include $(top_builddir)/lib/liblvm2cmd.cflow
-include $(top_builddir)/daemons/dmeventd/$(LIB_NAME).cflow
-include $(top_builddir)/daemons/dmeventd/plugins/mirror/$(LIB_NAME)-lvm2mirror.cflow
endif
install_pkgconfig:
$(INSTALL) -D $(OWNER) $(GROUP) -m 444 libdevmapper-event.pc \
$(usrlibdir)/pkgconfig/devmapper-event.pc
install_include: $(srcdir)/libdevmapper-event.h
$(INSTALL_DATA) -D $< $(includedir)/$(<F)
install_lib_dynamic: libdevmapper-event.$(LIB_SUFFIX)
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< \
$(libdir)/libdevmapper-event.$(LIB_SUFFIX).$(LIB_VERSION)
$(LN_S) -f libdevmapper-event.$(LIB_SUFFIX).$(LIB_VERSION) \
$(libdir)/libdevmapper-event.$(LIB_SUFFIX)
install_pkgconfig: libdevmapper-event.pc
$(INSTALL_DATA) -D $< $(pkgconfigdir)/devmapper-event.pc
install_lib_static: libdevmapper-event.a
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< \
$(libdir)/libdevmapper-event.a.$(LIB_VERSION)
$(LN_S) -f libdevmapper-event.a.$(LIB_VERSION) $(libdir)/libdevmapper-event.a
install_lib_dynamic: install_lib_shared
install_lib_static: $(LIB_STATIC)
$(INSTALL_DATA) -D $< $(usrlibdir)/$(<F)
install_lib: $(INSTALL_LIB_TARGETS)
install_dmeventd_dynamic: dmeventd
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< $(sbindir)/$<
$(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
install_dmeventd_static: dmeventd.static
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< $(sbindir)/$<
$(INSTALL_PROGRAM) -D $< $(staticdir)/$(<F)
install_dmeventd: $(INSTALL_DMEVENTD_TARGETS)
@@ -105,10 +105,4 @@ install: install_include install_lib install_dmeventd
install_device-mapper: install_include install_lib install_dmeventd
.PHONY: distclean_lib distclean
distclean_lib:
$(RM) libdevmapper-event.pc
distclean: distclean_lib
DISTCLEAN_TARGETS += libdevmapper-event.pc

File diff suppressed because it is too large Load Diff

View File

@@ -17,11 +17,8 @@
/* FIXME This stuff must be configurable. */
#define DM_EVENT_DAEMON "/sbin/dmeventd"
#define DM_EVENT_LOCKFILE "/var/lock/dmeventd"
#define DM_EVENT_FIFO_CLIENT "/var/run/dmeventd-client"
#define DM_EVENT_FIFO_SERVER "/var/run/dmeventd-server"
#define DM_EVENT_PIDFILE "/var/run/dmeventd.pid"
#define DM_EVENT_DEFAULT_TIMEOUT 10
@@ -35,6 +32,8 @@ enum dm_event_command {
DM_EVENT_CMD_SET_TIMEOUT,
DM_EVENT_CMD_GET_TIMEOUT,
DM_EVENT_CMD_HELLO,
DM_EVENT_CMD_DIE,
DM_EVENT_CMD_GET_STATUS,
};
/* Message passed between client and daemon. */
@@ -56,11 +55,21 @@ struct dm_event_fifos {
/* EXIT_SUCCESS 0 -- stdlib.h */
/* EXIT_FAILURE 1 -- stdlib.h */
#define EXIT_LOCKFILE_INUSE 2
/* EXIT_LOCKFILE_INUSE 2 -- obsoleted */
#define EXIT_DESC_CLOSE_FAILURE 3
#define EXIT_DESC_OPEN_FAILURE 4
#define EXIT_OPEN_PID_FAILURE 5
/* EXIT_OPEN_PID_FAILURE 5 -- obsoleted */
#define EXIT_FIFO_FAILURE 6
#define EXIT_CHDIR_FAILURE 7
/* Implemented in libdevmapper-event.c, but not part of public API. */
// FIXME misuse of bitmask as enum
int daemon_talk(struct dm_event_fifos *fifos,
struct dm_event_daemon_message *msg, int cmd,
const char *dso_name, const char *dev_name,
enum dm_event_mask evmask, uint32_t timeout);
int init_fifos(struct dm_event_fifos *fifos);
void fini_fifos(struct dm_event_fifos *fifos);
int dm_event_get_version(struct dm_event_fifos *fifos, int *version);
#endif /* __DMEVENTD_DOT_H__ */

View File

@@ -35,6 +35,8 @@ static int _sequence_nr = 0;
struct dm_event_handler {
char *dso;
char *dmeventd_path;
char *dev_name;
char *uuid;
@@ -47,10 +49,8 @@ struct dm_event_handler {
static void _dm_event_handler_clear_dev_info(struct dm_event_handler *dmevh)
{
if (dmevh->dev_name)
dm_free(dmevh->dev_name);
if (dmevh->uuid)
dm_free(dmevh->uuid);
dm_free(dmevh->dev_name);
dm_free(dmevh->uuid);
dmevh->dev_name = dmevh->uuid = NULL;
dmevh->major = dmevh->minor = 0;
}
@@ -62,6 +62,7 @@ struct dm_event_handler *dm_event_handler_create(void)
if (!(dmevh = dm_malloc(sizeof(*dmevh))))
return NULL;
dmevh->dmeventd_path = NULL;
dmevh->dso = dmevh->dev_name = dmevh->uuid = NULL;
dmevh->major = dmevh->minor = 0;
dmevh->mask = 0;
@@ -73,17 +74,30 @@ struct dm_event_handler *dm_event_handler_create(void)
void dm_event_handler_destroy(struct dm_event_handler *dmevh)
{
_dm_event_handler_clear_dev_info(dmevh);
if (dmevh->dso)
dm_free(dmevh->dso);
dm_free(dmevh->dso);
dm_free(dmevh->dmeventd_path);
dm_free(dmevh);
}
int dm_event_handler_set_dmeventd_path(struct dm_event_handler *dmevh, const char *dmeventd_path)
{
if (!dmeventd_path) /* noop */
return 0;
dm_free(dmevh->dmeventd_path);
dmevh->dmeventd_path = dm_strdup(dmeventd_path);
if (!dmevh->dmeventd_path)
return -ENOMEM;
return 0;
}
int dm_event_handler_set_dso(struct dm_event_handler *dmevh, const char *path)
{
if (!path) /* noop */
return 0;
if (dmevh->dso)
dm_free(dmevh->dso);
dm_free(dmevh->dso);
dmevh->dso = dm_strdup(path);
if (!dmevh->dso)
@@ -113,7 +127,7 @@ int dm_event_handler_set_uuid(struct dm_event_handler *dmevh, const char *uuid)
_dm_event_handler_clear_dev_info(dmevh);
dmevh->uuid = dm_strdup(uuid);
if (!dmevh->dev_name)
if (!dmevh->uuid)
return -ENOMEM;
return 0;
}
@@ -216,8 +230,8 @@ static int _daemon_read(struct dm_event_fifos *fifos,
fd_set fds;
struct timeval tval = { 0, 0 };
size_t size = 2 * sizeof(uint32_t); /* status + size */
char *buf = alloca(size);
int header = 1;
uint32_t *header = alloca(size);
char *buf = (char *)header;
while (bytes < size) {
for (i = 0, ret = 0; (i < 20) && (ret < 1); i++) {
@@ -231,6 +245,10 @@ static int _daemon_read(struct dm_event_fifos *fifos,
log_error("Unable to read from event server");
return 0;
}
if ((ret == 0) && (i > 4) && !bytes) {
log_error("No input from event server.");
return 0;
}
}
if (ret < 1) {
log_error("Unable to read from event server.");
@@ -248,9 +266,9 @@ static int _daemon_read(struct dm_event_fifos *fifos,
}
bytes += ret;
if (bytes == 2 * sizeof(uint32_t) && header) {
msg->cmd = ntohl(*((uint32_t *)buf));
msg->size = ntohl(*((uint32_t *)buf + 1));
if (header && (bytes == 2 * sizeof(uint32_t))) {
msg->cmd = ntohl(header[0]);
msg->size = ntohl(header[1]);
buf = msg->data = dm_malloc(msg->size);
size = msg->size;
bytes = 0;
@@ -259,11 +277,9 @@ static int _daemon_read(struct dm_event_fifos *fifos,
}
if (bytes != size) {
if (msg->data)
dm_free(msg->data);
dm_free(msg->data);
msg->data = NULL;
}
return bytes == size;
}
@@ -276,12 +292,13 @@ static int _daemon_write(struct dm_event_fifos *fifos,
fd_set fds;
size_t size = 2 * sizeof(uint32_t) + msg->size;
char *buf = alloca(size);
uint32_t *header = alloca(size);
char *buf = (char *)header;
char drainbuf[128];
struct timeval tval = { 0, 0 };
*((uint32_t *)buf) = htonl(msg->cmd);
*((uint32_t *)buf + 1) = htonl(msg->size);
header[0] = htonl(msg->cmd);
header[1] = htonl(msg->size);
memcpy(buf + 2 * sizeof(uint32_t), msg->data, msg->size);
/* drain the answer fifo */
@@ -296,7 +313,7 @@ static int _daemon_write(struct dm_event_fifos *fifos,
}
if (ret == 0)
break;
read(fifos->server, drainbuf, 127);
ret = read(fifos->server, drainbuf, 127);
}
while (bytes < size) {
@@ -311,8 +328,7 @@ static int _daemon_write(struct dm_event_fifos *fifos,
}
} while (ret < 1);
ret = write(fifos->client, ((char *) buf) + bytes,
size - bytes);
ret = write(fifos->client, buf + bytes, size - bytes);
if (ret < 0) {
if ((errno == EINTR) || (errno == EAGAIN))
continue;
@@ -328,13 +344,13 @@ static int _daemon_write(struct dm_event_fifos *fifos,
return bytes == size;
}
static int _daemon_talk(struct dm_event_fifos *fifos,
struct dm_event_daemon_message *msg, int cmd,
const char *dso_name, const char *dev_name,
enum dm_event_mask evmask, uint32_t timeout)
int daemon_talk(struct dm_event_fifos *fifos,
struct dm_event_daemon_message *msg, int cmd,
const char *dso_name, const char *dev_name,
enum dm_event_mask evmask, uint32_t timeout)
{
const char *dso = dso_name ? dso_name : "";
const char *dev = dev_name ? dev_name : "";
const char *dso = dso_name ? dso_name : "-";
const char *dev = dev_name ? dev_name : "-";
const char *fmt = "%d:%d %s %s %u %" PRIu32;
int msg_size;
memset(msg, 0, sizeof(*msg));
@@ -366,8 +382,7 @@ static int _daemon_talk(struct dm_event_fifos *fifos,
do {
if (msg->data)
dm_free(msg->data);
dm_free(msg->data);
msg->data = 0;
if (!_daemon_read(fifos, msg)) {
@@ -393,13 +408,13 @@ static int _daemon_talk(struct dm_event_fifos *fifos,
*
* Returns: 1 on success, 0 otherwise
*/
static int _start_daemon(struct dm_event_fifos *fifos)
static int _start_daemon(char *dmeventd_path, struct dm_event_fifos *fifos)
{
int pid, ret = 0;
int status;
struct stat statbuf;
char dmeventdpath[] = DMEVENTD_PATH; /* const type for execvp */
char * const args[] = { dmeventdpath, NULL };
char default_dmeventd_path[] = DMEVENTD_PATH;
char *args[] = { dmeventd_path ? : default_dmeventd_path, NULL };
if (stat(fifos->client_path, &statbuf))
goto start_server;
@@ -413,33 +428,31 @@ static int _start_daemon(struct dm_event_fifos *fifos)
fifos->client = open(fifos->client_path, O_WRONLY | O_NONBLOCK);
if (fifos->client >= 0) {
/* server is running and listening */
close(fifos->client);
if (close(fifos->client))
log_sys_error("close", fifos->client_path);
return 1;
} else if (errno != ENXIO) {
/* problem */
log_error("%s: Can't open client fifo %s: %s",
__func__, fifos->client_path, strerror(errno));
stack;
log_sys_error("open", fifos->client_path);
return 0;
}
start_server:
/* server is not running */
if (!strncmp(DMEVENTD_PATH, "/", 1) && stat(DMEVENTD_PATH, &statbuf)) {
log_error("Unable to find dmeventd.");
return_0;
if ((args[0][0] == '/') && stat(args[0], &statbuf)) {
log_sys_error("stat", args[0]);
return 0;
}
pid = fork();
if (pid < 0)
log_error("Unable to fork.");
log_sys_error("fork", "");
else if (!pid) {
execvp(args[0], args);
log_error("Unable to exec dmeventd: %s", strerror(errno));
_exit(EXIT_FAILURE);
} else {
if (waitpid(pid, &status, 0) < 0)
@@ -454,56 +467,65 @@ static int _start_daemon(struct dm_event_fifos *fifos)
return ret;
}
/* Initialize client. */
static int _init_client(struct dm_event_fifos *fifos)
int init_fifos(struct dm_event_fifos *fifos)
{
/* FIXME? Is fifo the most suitable method? Why not share
comms/daemon code with something else e.g. multipath? */
/* init fifos */
memset(fifos, 0, sizeof(*fifos));
/* FIXME Make these either configurable or depend directly on dmeventd_path */
fifos->client_path = DM_EVENT_FIFO_CLIENT;
fifos->server_path = DM_EVENT_FIFO_SERVER;
if (!_start_daemon(fifos)) {
stack;
return 0;
}
/* Open the fifo used to read from the daemon. */
if ((fifos->server = open(fifos->server_path, O_RDWR)) < 0) {
log_error("%s: open server fifo %s",
__func__, fifos->server_path);
stack;
log_sys_error("open", fifos->server_path);
return 0;
}
/* Lock out anyone else trying to do communication with the daemon. */
if (flock(fifos->server, LOCK_EX) < 0) {
log_error("%s: flock %s", __func__, fifos->server_path);
close(fifos->server);
log_sys_error("flock", fifos->server_path);
if (close(fifos->server))
log_sys_error("close", fifos->server_path);
return 0;
}
/* if ((fifos->client = open(fifos->client_path, O_WRONLY | O_NONBLOCK)) < 0) {*/
if ((fifos->client = open(fifos->client_path, O_RDWR | O_NONBLOCK)) < 0) {
log_error("%s: Can't open client fifo %s: %s",
__func__, fifos->client_path, strerror(errno));
close(fifos->server);
stack;
log_sys_error("open", fifos->client_path);
if (close(fifos->server))
log_sys_error("close", fifos->server_path);
return 0;
}
return 1;
}
static void _dtr_client(struct dm_event_fifos *fifos)
/* Initialize client. */
static int _init_client(char *dmeventd_path, struct dm_event_fifos *fifos)
{
/* init fifos */
memset(fifos, 0, sizeof(*fifos));
/* FIXME Make these either configurable or depend directly on dmeventd_path */
fifos->client_path = DM_EVENT_FIFO_CLIENT;
fifos->server_path = DM_EVENT_FIFO_SERVER;
if (!_start_daemon(dmeventd_path, fifos))
return_0;
return init_fifos(fifos);
}
void fini_fifos(struct dm_event_fifos *fifos)
{
if (flock(fifos->server, LOCK_UN))
log_error("flock unlock %s", fifos->server_path);
close(fifos->client);
close(fifos->server);
if (close(fifos->client))
log_sys_error("close", fifos->client_path);
if (close(fifos->server))
log_sys_error("close", fifos->server_path);
}
/* Get uuid of a device */
@@ -517,62 +539,73 @@ static struct dm_task *_get_device_info(const struct dm_event_handler *dmevh)
return NULL;
}
if (dmevh->uuid)
dm_task_set_uuid(dmt, dmevh->uuid);
else if (dmevh->dev_name)
dm_task_set_name(dmt, dmevh->dev_name);
else if (dmevh->major && dmevh->minor) {
dm_task_set_major(dmt, dmevh->major);
dm_task_set_minor(dmt, dmevh->minor);
}
if (dmevh->uuid) {
if (!dm_task_set_uuid(dmt, dmevh->uuid))
goto_bad;
} else if (dmevh->dev_name) {
if (!dm_task_set_name(dmt, dmevh->dev_name))
goto_bad;
} else if (dmevh->major && dmevh->minor) {
if (!dm_task_set_major(dmt, dmevh->major) ||
!dm_task_set_minor(dmt, dmevh->minor))
goto_bad;
}
/* FIXME Add name or uuid or devno to messages */
if (!dm_task_run(dmt)) {
log_error("_get_device_info: dm_task_run() failed");
goto failed;
goto bad;
}
if (!dm_task_get_info(dmt, &info)) {
log_error("_get_device_info: failed to get info for device");
goto failed;
goto bad;
}
if (!info.exists) {
log_error("_get_device_info: device not found");
goto failed;
log_error("_get_device_info: %s%s%s%.0d%s%.0d%s%s: device not found",
dmevh->uuid ? : "",
(!dmevh->uuid && dmevh->dev_name) ? dmevh->dev_name : "",
(!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? "(" : "",
(!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? dmevh->major : 0,
(!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? ":" : "",
(!dmevh->uuid && !dmevh->dev_name && dmevh->minor > 0) ? dmevh->minor : 0,
(!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) && dmevh->minor == 0 ? "0" : "",
(!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? ") " : "");
goto bad;
}
return dmt;
failed:
bad:
dm_task_destroy(dmt);
return NULL;
}
/* Handle the event (de)registration call and return negative error codes. */
static int _do_event(int cmd, struct dm_event_daemon_message *msg,
static int _do_event(int cmd, char *dmeventd_path, struct dm_event_daemon_message *msg,
const char *dso_name, const char *dev_name,
enum dm_event_mask evmask, uint32_t timeout)
{
int ret;
struct dm_event_fifos fifos;
if (!_init_client(&fifos)) {
if (!_init_client(dmeventd_path, &fifos)) {
stack;
return -ESRCH;
}
ret = _daemon_talk(&fifos, msg, DM_EVENT_CMD_HELLO, 0, 0, 0, 0);
ret = daemon_talk(&fifos, msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0);
if (msg->data)
dm_free(msg->data);
dm_free(msg->data);
msg->data = 0;
if (!ret)
ret = _daemon_talk(&fifos, msg, cmd, dso_name, dev_name, evmask, timeout);
ret = daemon_talk(&fifos, msg, cmd, dso_name, dev_name, evmask, timeout);
/* what is the opposite of init? */
_dtr_client(&fifos);
fini_fifos(&fifos);
return ret;
}
@@ -592,7 +625,7 @@ int dm_event_register_handler(const struct dm_event_handler *dmevh)
uuid = dm_task_get_uuid(dmt);
if ((err = _do_event(DM_EVENT_CMD_REGISTER_FOR_EVENT, &msg,
if ((err = _do_event(DM_EVENT_CMD_REGISTER_FOR_EVENT, dmevh->dmeventd_path, &msg,
dmevh->dso, uuid, dmevh->mask, dmevh->timeout)) < 0) {
log_error("%s: event registration failed: %s",
dm_task_get_name(dmt),
@@ -600,8 +633,7 @@ int dm_event_register_handler(const struct dm_event_handler *dmevh)
ret = 0;
}
if (msg.data)
dm_free(msg.data);
dm_free(msg.data);
dm_task_destroy(dmt);
@@ -622,7 +654,7 @@ int dm_event_unregister_handler(const struct dm_event_handler *dmevh)
uuid = dm_task_get_uuid(dmt);
if ((err = _do_event(DM_EVENT_CMD_UNREGISTER_FOR_EVENT, &msg,
if ((err = _do_event(DM_EVENT_CMD_UNREGISTER_FOR_EVENT, dmevh->dmeventd_path, &msg,
dmevh->dso, uuid, dmevh->mask, dmevh->timeout)) < 0) {
log_error("%s: event deregistration failed: %s",
dm_task_get_name(dmt),
@@ -630,8 +662,7 @@ int dm_event_unregister_handler(const struct dm_event_handler *dmevh)
ret = 0;
}
if (msg.data)
dm_free(msg.data);
dm_free(msg.data);
dm_task_destroy(dmt);
@@ -688,6 +719,7 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next)
enum dm_event_mask reply_mask = 0;
struct dm_task *dmt = NULL;
struct dm_event_daemon_message msg = { 0, 0, NULL };
struct dm_info info;
if (!(dmt = _get_device_info(dmevh))) {
stack;
@@ -696,26 +728,29 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next)
uuid = dm_task_get_uuid(dmt);
if (!(ret = _do_event(next ? DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE :
DM_EVENT_CMD_GET_REGISTERED_DEVICE,
&msg, dmevh->dso, uuid, dmevh->mask, 0))) {
/* FIXME this will probably horribly break if we get
ill-formatted reply */
ret = _parse_message(&msg, &reply_dso, &reply_uuid, &reply_mask);
} else {
if (_do_event(next ? DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE :
DM_EVENT_CMD_GET_REGISTERED_DEVICE, dmevh->dmeventd_path,
&msg, dmevh->dso, uuid, dmevh->mask, 0)) {
log_debug("%s: device not registered.", dm_task_get_name(dmt));
ret = -ENOENT;
goto fail;
}
/* FIXME this will probably horribly break if we get
ill-formatted reply */
ret = _parse_message(&msg, &reply_dso, &reply_uuid, &reply_mask);
dm_task_destroy(dmt);
dmt = NULL;
if (msg.data) {
dm_free(msg.data);
msg.data = NULL;
}
dm_free(msg.data);
msg.data = NULL;
_dm_event_handler_clear_dev_info(dmevh);
if (!reply_uuid) {
ret = -ENXIO; /* dmeventd probably gave us bogus uuid back */
goto fail;
}
dmevh->uuid = dm_strdup(reply_uuid);
if (!dmevh->uuid) {
ret = -ENOMEM;
@@ -730,15 +765,11 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next)
dm_event_handler_set_dso(dmevh, reply_dso);
dm_event_handler_set_event_mask(dmevh, reply_mask);
if (reply_dso) {
dm_free(reply_dso);
reply_dso = NULL;
}
dm_free(reply_dso);
reply_dso = NULL;
if (reply_uuid) {
dm_free(reply_uuid);
reply_uuid = NULL;
}
dm_free(reply_uuid);
reply_uuid = NULL;
dmevh->dev_name = dm_strdup(dm_task_get_name(dmt));
if (!dmevh->dev_name) {
@@ -746,7 +777,6 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next)
goto fail;
}
struct dm_info info;
if (!dm_task_get_info(dmt, &info)) {
ret = -1;
goto fail;
@@ -760,18 +790,45 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next)
return ret;
fail:
if (msg.data)
dm_free(msg.data);
if (reply_dso)
dm_free(reply_dso);
if (reply_uuid)
dm_free(reply_uuid);
dm_free(msg.data);
dm_free(reply_dso);
dm_free(reply_uuid);
_dm_event_handler_clear_dev_info(dmevh);
if (dmt)
dm_task_destroy(dmt);
return ret;
}
/*
* You can (and have to) call this at the stage of the protocol where
* daemon_talk(fifos, &msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0)
*
* would be normally sent. This call will parse the version reply from
* dmeventd, in addition to above call. It is not safe to call this at any
* other place in the protocol.
*
* This is an internal function, not exposed in the public API.
*/
int dm_event_get_version(struct dm_event_fifos *fifos, int *version) {
char *p;
struct dm_event_daemon_message msg = { 0, 0, NULL };
if (daemon_talk(fifos, &msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0))
return 0;
p = msg.data;
*version = 0;
p = strchr(p, ' '); /* Message ID */
if (!p) return 0;
p = strchr(p + 1, ' '); /* HELLO */
if (!p) return 0;
p = strchr(p + 1, ' '); /* HELLO, once more */
if (p)
*version = atoi(p);
return 1;
}
#if 0 /* left out for now */
static char *_skip_string(char *src, const int delimiter)

View File

@@ -46,6 +46,7 @@ enum dm_event_mask {
};
#define DM_EVENT_ALL_ERRORS DM_EVENT_ERROR_MASK
#define DM_EVENT_PROTOCOL_VERSION 1
struct dm_event_handler;
@@ -55,12 +56,17 @@ void dm_event_handler_destroy(struct dm_event_handler *dmevh);
/*
* Path of shared library to handle events.
*
* All of dso, device_name and uuid strings are duplicated, you do not
* need to keep the pointers valid after the call succeeds. Thes may
* return -ENOMEM though.
* All of dmeventd, dso, device_name and uuid strings are duplicated so
* you do not need to keep the pointers valid after the call succeeds.
* They may return -ENOMEM though.
*/
int dm_event_handler_set_dso(struct dm_event_handler *dmevh, const char *path);
/*
* Path of dmeventd binary.
*/
int dm_event_handler_set_dmeventd_path(struct dm_event_handler *dmevh, const char *dmeventd_path);
/*
* Identify the device to monitor by exactly one of device_name, uuid or
* device number. String arguments are duplicated, see above.
@@ -76,6 +82,7 @@ void dm_event_handler_set_timeout(struct dm_event_handler *dmevh, int timeout);
/*
* Specify mask for events to monitor.
*/
// FIXME misuse of bitmask as enum
void dm_event_handler_set_event_mask(struct dm_event_handler *dmevh,
enum dm_event_mask evmask);
@@ -85,6 +92,7 @@ const char *dm_event_handler_get_uuid(const struct dm_event_handler *dmevh);
int dm_event_handler_get_major(const struct dm_event_handler *dmevh);
int dm_event_handler_get_minor(const struct dm_event_handler *dmevh);
int dm_event_handler_get_timeout(const struct dm_event_handler *dmevh);
// FIXME misuse of bitmask as enum
enum dm_event_mask dm_event_handler_get_event_mask(const struct dm_event_handler *dmevh);
/* FIXME Review interface (what about this next thing?) */
@@ -98,6 +106,7 @@ int dm_event_unregister_handler(const struct dm_event_handler *dmevh);
/* Prototypes for DSO interface, see dmeventd.c, struct dso_data for
detailed descriptions. */
// FIXME misuse of bitmask as enum
void process_event(struct dm_task *dmt, enum dm_event_mask evmask, void **user);
int register_device(const char *device_name, const char *uuid, int major, int minor, void **user);
int unregister_device(const char *device_name, const char *uuid, int major,

View File

@@ -5,8 +5,7 @@ includedir=@includedir@
Name: devmapper-event
Description: device-mapper event library
Version: @DM_LIB_VERSION@
Requires: devmapper
Version: @DM_LIB_PATCHLEVEL@
Cflags: -I${includedir}
Libs: -L${libdir} -ldevmapper-event
Libs.private: -lpthread -ldl
Requires.private: devmapper

View File

@@ -1,6 +1,6 @@
#
# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
# Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
# Copyright (C) 2004-2005, 2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -15,12 +15,32 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
SUBDIRS += lvm2 mirror snapshot
SUBDIRS += lvm2
ifneq ("@MIRRORS@", "none")
SUBDIRS += mirror
endif
ifneq ("@SNAPSHOTS@", "none")
SUBDIRS += snapshot
endif
ifneq ("@RAID@", "none")
SUBDIRS += raid
endif
ifneq ("@THIN@", "none")
SUBDIRS += thin
endif
ifeq ($(MAKECMDGOALS),distclean)
SUBDIRS = lvm2 mirror snapshot raid thin
endif
include $(top_builddir)/make.tmpl
mirror: lvm2
snapshot: lvm2
include ../../../make.tmpl
mirror: lvm2
raid: lvm2
thin: lvm2

View File

@@ -1,6 +1,7 @@
init_lvm
fini_lvm
lock_lvm
unlock_lvm
lvm_pool
lvm_handle
dmeventd_lvm2_init
dmeventd_lvm2_exit
dmeventd_lvm2_lock
dmeventd_lvm2_unlock
dmeventd_lvm2_pool
dmeventd_lvm2_run
dmeventd_lvm2_command

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2010 Red Hat, Inc. All rights reserved.
# Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -14,26 +14,18 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
CLDFLAGS += -L${top_builddir}/tools
CLDFLAGS += -L$(top_builddir)/tools
SOURCES = dmeventd_lvm.c
ifeq ("@LIB_SUFFIX@","dylib")
LIB_SHARED = libdevmapper-event-lvm2.dylib
else
LIB_SHARED = libdevmapper-event-lvm2.so
endif
LIB_SHARED = libdevmapper-event-lvm2.$(LIB_SUFFIX)
LIB_VERSION = $(LIB_VERSION_LVM)
include $(top_builddir)/make.tmpl
LIBS += -ldevmapper @LIB_PTHREAD@ @LVM2CMD_LIB@
LIBS += @LVM2CMD_LIB@ -ldevmapper $(PTHREAD_LIBS) $(DAEMON_LIBS)
install_lvm2: libdevmapper-event-lvm2.$(LIB_SUFFIX)
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< \
$(libdir)/$<.$(LIB_VERSION)
$(LN_S) -f $<.$(LIB_VERSION) $(libdir)/$<
install_lvm2: install_lib_shared
install: install_lvm2

View File

@@ -13,6 +13,7 @@
*/
#include "lib.h"
#include "log.h"
#include "lvm2cmd.h"
#include "errors.h"
@@ -22,6 +23,8 @@
#include <pthread.h>
#include <syslog.h>
extern int dmeventd_debug;
/*
* register_device() is called first and performs initialisation.
* Only one device may be registered or unregistered at a time.
@@ -40,27 +43,46 @@ static void *_lvm_handle = NULL;
*/
static pthread_mutex_t _event_mutex = PTHREAD_MUTEX_INITIALIZER;
/* FIXME Remove this: Pass messages back to dmeventd core for processing. */
/*
* FIXME Do not pass things directly to syslog, rather use the existing logging
* facilities to sort logging ... however that mechanism needs to be somehow
* configurable and we don't have that option yet
*/
static void _temporary_log_fn(int level,
const char *file __attribute((unused)),
int line __attribute((unused)),
int dm_errno __attribute((unused)),
const char *format)
const char *file __attribute__((unused)),
int line __attribute__((unused)),
int dm_errno __attribute__((unused)),
const char *message)
{
level &= ~_LOG_STDERR;
level &= ~(_LOG_STDERR | _LOG_ONCE);
if (!strncmp(format, "WARNING: ", 9) && (level < 5))
syslog(LOG_CRIT, "%s", format);
else
syslog(LOG_DEBUG, "%s", format);
switch (level) {
case _LOG_DEBUG:
if (dmeventd_debug >= 3)
syslog(LOG_DEBUG, "%s", message);
break;
case _LOG_INFO:
if (dmeventd_debug >= 2)
syslog(LOG_INFO, "%s", message);
break;
case _LOG_NOTICE:
if (dmeventd_debug >= 1)
syslog(LOG_NOTICE, "%s", message);
break;
case _LOG_WARN:
syslog(LOG_WARNING, "%s", message);
break;
case _LOG_ERR:
syslog(LOG_ERR, "%s", message);
break;
default:
syslog(LOG_CRIT, "%s", message);
}
}
void dmeventd_lvm2_lock(void)
{
if (pthread_mutex_trylock(&_event_mutex)) {
syslog(LOG_NOTICE, "Another thread is handling an event. Waiting...");
pthread_mutex_lock(&_event_mutex);
}
pthread_mutex_lock(&_event_mutex);
}
void dmeventd_lvm2_unlock(void)
@@ -88,6 +110,7 @@ int dmeventd_lvm2_init(void)
_mem_pool = NULL;
goto out;
}
lvm2_disable_dmeventd_monitoring(_lvm_handle);
/* FIXME Temporary: move to dmeventd core */
lvm2_run(_lvm_handle, "_memlock_inc");
}
@@ -105,9 +128,9 @@ void dmeventd_lvm2_exit(void)
pthread_mutex_lock(&_register_mutex);
if (!--_register_count) {
lvm2_run(_lvm_handle, "_memlock_dec");
dm_pool_destroy(_mem_pool);
_mem_pool = NULL;
lvm2_run(_lvm_handle, "_memlock_dec");
lvm2_exit(_lvm_handle);
_lvm_handle = NULL;
}
@@ -125,3 +148,31 @@ int dmeventd_lvm2_run(const char *cmdline)
return lvm2_run(_lvm_handle, cmdline);
}
int dmeventd_lvm2_command(struct dm_pool *mem, char *buffer, size_t size,
const char *cmd, const char *device)
{
char *vg = NULL, *lv = NULL, *layer;
int r;
if (!dm_split_lvm_name(mem, device, &vg, &lv, &layer)) {
syslog(LOG_ERR, "Unable to determine VG name from %s.\n",
device);
return 0;
}
/* strip off the mirror component designations */
layer = strstr(lv, "_mlog");
if (layer)
*layer = '\0';
r = dm_snprintf(buffer, size, "%s %s/%s", cmd, vg, lv);
dm_pool_free(mem, vg);
if (r < 0) {
syslog(LOG_ERR, "Unable to form LVM command. (too long).\n");
return 0;
}
return 1;
}

View File

@@ -36,4 +36,7 @@ void dmeventd_lvm2_unlock(void);
struct dm_pool *dmeventd_lvm2_pool(void);
int dmeventd_lvm2_command(struct dm_pool *mem, char *buffer, size_t size,
const char *cmd, const char *device);
#endif /* _DMEVENTD_LVMWRAP_H */

View File

@@ -1,6 +1,6 @@
#
# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
# Copyright (C) 2004-2005, 2008-2010 Red Hat, Inc. All rights reserved.
# Copyright (C) 2004-2005, 2008-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -15,28 +15,23 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
INCLUDES += -I${top_srcdir}/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2
CLDFLAGS += -L${top_builddir}/tools -L${top_builddir}/daemons/dmeventd/plugins/lvm2
INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2
CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2
SOURCES = dmeventd_mirror.c
ifeq ("@LIB_SUFFIX@","dylib")
LIB_SHARED = libdevmapper-event-lvm2mirror.dylib
else
LIB_SHARED = libdevmapper-event-lvm2mirror.so
endif
LIB_NAME = libdevmapper-event-lvm2mirror
LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX)
LIB_VERSION = $(LIB_VERSION_LVM)
CFLOW_LIST = $(SOURCES)
CFLOW_LIST_TARGET = $(LIB_NAME).cflow
include $(top_builddir)/make.tmpl
LIBS += -ldevmapper @LIB_PTHREAD@ @LVM2CMD_LIB@ -ldevmapper-event-lvm2
LIBS += -ldevmapper-event-lvm2 -ldevmapper $(DAEMON_LIBS)
install_lvm2: libdevmapper-event-lvm2mirror.$(LIB_SUFFIX)
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< \
$(libdir)/$<.$(LIB_VERSION)
$(LN_S) -f $<.$(LIB_VERSION) $(libdir)/$<
install_lvm2: install_dm_plugin
install: install_lvm2

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
* Copyright (C) 2005-2012 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -18,8 +18,7 @@
#include "errors.h"
#include "libdevmapper-event.h"
#include "dmeventd_lvm.h"
#include <pthread.h>
#include "defaults.h"
#include <syslog.h> /* FIXME Replace syslog with multilog */
/* FIXME Missing openlog? */
@@ -42,17 +41,17 @@ static int _process_status_code(const char status_code, const char *dev_name,
* U => Unclassified failure (bug)
*/
if (status_code == 'F') {
syslog(LOG_ERR, "%s device %s flush failed.\n",
syslog(LOG_ERR, "%s device %s flush failed.",
dev_type, dev_name);
r = ME_FAILURE;
} else if (status_code == 'S')
syslog(LOG_ERR, "%s device %s sync failed.\n",
syslog(LOG_ERR, "%s device %s sync failed.",
dev_type, dev_name);
else if (status_code == 'R')
syslog(LOG_ERR, "%s device %s read failed.\n",
syslog(LOG_ERR, "%s device %s read failed.",
dev_type, dev_name);
else if (status_code != 'A') {
syslog(LOG_ERR, "%s device %s has failed (%c).\n",
syslog(LOG_ERR, "%s device %s has failed (%c).",
dev_type, dev_name, status_code);
r = ME_FAILURE;
}
@@ -83,7 +82,8 @@ static int _get_mirror_event(char *params)
if (!dm_split_words(params, 1, 0, &p))
goto out_parse;
if (!(num_devs = atoi(p)))
if (!(num_devs = atoi(p)) ||
(num_devs > DEFAULT_MIRROR_MAX_IMAGES) || (num_devs < 0))
goto out_parse;
p += strlen(p) + 1;
@@ -92,6 +92,7 @@ static int _get_mirror_event(char *params)
if (!args || dm_split_words(p, num_devs + 7, 0, args) < num_devs + 5)
goto out_parse;
/* FIXME: Code differs from lib/mirror/mirrored.c */
dev_status_str = args[2 + num_devs];
log_argc = atoi(args[3 + num_devs]);
log_status_str = args[3 + num_devs + log_argc];
@@ -121,13 +122,11 @@ static int _get_mirror_event(char *params)
goto out_parse;
out:
if (args)
dm_free(args);
dm_free(args);
return r;
out_parse:
if (args)
dm_free(args);
dm_free(args);
syslog(LOG_ERR, "Unable to parse mirror status string.");
return ME_IGNORE;
}
@@ -137,34 +136,23 @@ static int _remove_failed_devices(const char *device)
int r;
#define CMD_SIZE 256 /* FIXME Use system restriction */
char cmd_str[CMD_SIZE];
char *vg = NULL, *lv = NULL, *layer = NULL;
if (strlen(device) > 200) /* FIXME Use real restriction */
return -ENAMETOOLONG; /* FIXME These return code distinctions are not used so remove them! */
if (!dm_split_lvm_name(dmeventd_lvm2_pool(), device, &vg, &lv, &layer)) {
syslog(LOG_ERR, "Unable to determine VG name from %s",
device);
return -ENOMEM; /* FIXME Replace with generic error return - reason for failure has already got logged */
}
/* FIXME Is any sanity-checking required on %s? */
if (CMD_SIZE <= snprintf(cmd_str, CMD_SIZE, "lvconvert --config devices{ignore_suspended_devices=1} --repair --use-policies %s/%s", vg, lv)) {
/* this error should be caught above, but doesn't hurt to check again */
syslog(LOG_ERR, "Unable to form LVM command: Device name too long");
if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str),
"lvconvert --config devices{ignore_suspended_devices=1} "
"--repair --use-policies", device))
return -ENAMETOOLONG; /* FIXME Replace with generic error return - reason for failure has already got logged */
}
r = dmeventd_lvm2_run(cmd_str);
syslog(LOG_INFO, "Repair of mirrored LV %s/%s %s.", vg, lv, (r == ECMD_PROCESSED) ? "finished successfully" : "failed");
syslog(LOG_INFO, "Repair of mirrored device %s %s.", device,
(r == ECMD_PROCESSED) ? "finished successfully" : "failed");
return (r == ECMD_PROCESSED) ? 0 : -1;
}
void process_event(struct dm_task *dmt,
enum dm_event_mask event __attribute((unused)),
void **unused __attribute((unused)))
enum dm_event_mask event __attribute__((unused)),
void **unused __attribute__((unused)))
{
void *next = NULL;
uint64_t start, length;
@@ -179,12 +167,12 @@ void process_event(struct dm_task *dmt,
&target_type, &params);
if (!target_type) {
syslog(LOG_INFO, "%s mapping lost.\n", device);
syslog(LOG_INFO, "%s mapping lost.", device);
continue;
}
if (strcmp(target_type, "mirror")) {
syslog(LOG_INFO, "%s has unmirrored portion.\n", device);
syslog(LOG_INFO, "%s has unmirrored portion.", device);
continue;
}
@@ -194,13 +182,13 @@ void process_event(struct dm_task *dmt,
_part_ of the device is in sync
Also, this is not an error
*/
syslog(LOG_NOTICE, "%s is now in-sync\n", device);
syslog(LOG_NOTICE, "%s is now in-sync.", device);
break;
case ME_FAILURE:
syslog(LOG_ERR, "Device failure in %s\n", device);
syslog(LOG_ERR, "Device failure in %s.", device);
if (_remove_failed_devices(device))
/* FIXME Why are all the error return codes unused? Get rid of them? */
syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
syslog(LOG_ERR, "Failed to remove faulty devices in %s.",
device);
/* Should check before warning user that device is now linear
else
@@ -212,7 +200,7 @@ void process_event(struct dm_task *dmt,
break;
default:
/* FIXME Provide value then! */
syslog(LOG_INFO, "Unknown event received.\n");
syslog(LOG_INFO, "Unknown event received.");
}
} while (next);
@@ -220,23 +208,28 @@ void process_event(struct dm_task *dmt,
}
int register_device(const char *device,
const char *uuid __attribute((unused)),
int major __attribute((unused)),
int minor __attribute((unused)),
void **unused __attribute((unused)))
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
void **unused __attribute__((unused)))
{
syslog(LOG_INFO, "Monitoring mirror device %s for events", device);
return dmeventd_lvm2_init();
if (!dmeventd_lvm2_init())
return 0;
syslog(LOG_INFO, "Monitoring mirror device %s for events.", device);
return 1;
}
int unregister_device(const char *device,
const char *uuid __attribute((unused)),
int major __attribute((unused)),
int minor __attribute((unused)),
void **unused __attribute((unused)))
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
void **unused __attribute__((unused)))
{
syslog(LOG_INFO, "No longer monitoring mirror device %s for events\n",
syslog(LOG_INFO, "No longer monitoring mirror device %s for events.",
device);
dmeventd_lvm2_exit();
return 1;
}

View File

@@ -0,0 +1,3 @@
process_event
register_device
unregister_device

View File

@@ -0,0 +1,36 @@
#
# Copyright (C) 2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
# of the GNU General Public License v.2.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2
CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2
SOURCES = dmeventd_raid.c
LIB_NAME = libdevmapper-event-lvm2raid
LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX)
LIB_VERSION = $(LIB_VERSION_LVM)
CFLOW_LIST = $(SOURCES)
CFLOW_LIST_TARGET = $(LIB_NAME).cflow
include $(top_builddir)/make.tmpl
LIBS += -ldevmapper-event-lvm2 -ldevmapper
install_lvm2: install_dm_plugin
install: install_lvm2

View File

@@ -0,0 +1,172 @@
/*
* Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "lib.h"
#include "lvm2cmd.h"
#include "errors.h"
#include "libdevmapper-event.h"
#include "dmeventd_lvm.h"
#include <syslog.h> /* FIXME Replace syslog with multilog */
/* FIXME Missing openlog? */
/* FIXME Replace most syslogs with log_error() style messages and add complete context. */
/* FIXME Reformat to 80 char lines. */
/*
* run_repair is a close copy to
* plugins/mirror/dmeventd_mirror.c:_remove_failed_devices()
*/
static int run_repair(const char *device)
{
int r;
#define CMD_SIZE 256 /* FIXME Use system restriction */
char cmd_str[CMD_SIZE];
if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str),
"lvconvert --config devices{ignore_suspended_devices=1} "
"--repair --use-policies", device))
return -1;
r = dmeventd_lvm2_run(cmd_str);
if (r != ECMD_PROCESSED)
syslog(LOG_INFO, "Repair of RAID device %s failed.", device);
return (r == ECMD_PROCESSED) ? 0 : -1;
}
static int _process_raid_event(char *params, const char *device)
{
int i, n, failure = 0;
char *p, *a[4];
char *raid_type;
char *num_devices;
char *health_chars;
char *resync_ratio;
/*
* RAID parms: <raid_type> <#raid_disks> \
* <health chars> <resync ratio>
*/
if (!dm_split_words(params, 4, 0, a)) {
syslog(LOG_ERR, "Failed to process status line for %s\n",
device);
return -EINVAL;
}
raid_type = a[0];
num_devices = a[1];
health_chars = a[2];
resync_ratio = a[3];
if (!(n = atoi(num_devices))) {
syslog(LOG_ERR, "Failed to parse number of devices for %s: %s",
device, num_devices);
return -EINVAL;
}
for (i = 0; i < n; i++) {
switch (health_chars[i]) {
case 'A':
/* Device is 'A'live and well */
case 'a':
/* Device is 'a'live, but not yet in-sync */
break;
case 'D':
syslog(LOG_ERR,
"Device #%d of %s array, %s, has failed.",
i, raid_type, device);
failure++;
break;
default:
/* Unhandled character returned from kernel */
break;
}
if (failure)
return run_repair(device);
}
p = strstr(resync_ratio, "/");
if (!p) {
syslog(LOG_ERR, "Failed to parse resync_ratio for %s: %s",
device, resync_ratio);
return -EINVAL;
}
p[0] = '\0';
syslog(LOG_INFO, "%s array, %s, is %s in-sync.",
raid_type, device, strcmp(resync_ratio, p+1) ? "not" : "now");
return 0;
}
void process_event(struct dm_task *dmt,
enum dm_event_mask event __attribute__((unused)),
void **unused __attribute__((unused)))
{
void *next = NULL;
uint64_t start, length;
char *target_type = NULL;
char *params;
const char *device = dm_task_get_name(dmt);
dmeventd_lvm2_lock();
do {
next = dm_get_next_target(dmt, next, &start, &length,
&target_type, &params);
if (!target_type) {
syslog(LOG_INFO, "%s mapping lost.", device);
continue;
}
if (strcmp(target_type, "raid")) {
syslog(LOG_INFO, "%s has non-raid portion.", device);
continue;
}
if (_process_raid_event(params, device))
syslog(LOG_ERR, "Failed to process event for %s",
device);
} while (next);
dmeventd_lvm2_unlock();
}
int register_device(const char *device,
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
void **unused __attribute__((unused)))
{
if (!dmeventd_lvm2_init())
return 0;
syslog(LOG_INFO, "Monitoring RAID device %s for events.", device);
return 1;
}
int unregister_device(const char *device,
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
void **unused __attribute__((unused)))
{
syslog(LOG_INFO, "No longer monitoring RAID device %s for events.",
device);
dmeventd_lvm2_exit();
return 1;
}

View File

@@ -1,6 +1,6 @@
#
# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of the LVM2.
#
@@ -15,28 +15,19 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
INCLUDES += -I${top_srcdir}/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2
CLDFLAGS += -L${top_builddir}/tools -L${top_builddir}/daemons/dmeventd/plugins/lvm2
INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2
CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2
SOURCES = dmeventd_snapshot.c
ifeq ("@LIB_SUFFIX@","dylib")
LIB_SHARED = libdevmapper-event-lvm2snapshot.dylib
else
LIB_SHARED = libdevmapper-event-lvm2snapshot.so
endif
LIB_SHARED = libdevmapper-event-lvm2snapshot.$(LIB_SUFFIX)
LIB_VERSION = $(LIB_VERSION_LVM)
include $(top_builddir)/make.tmpl
LIBS += -ldevmapper @LIB_PTHREAD@ @LVM2CMD_LIB@ -ldevmapper-event-lvm2
LIBS += -ldevmapper-event-lvm2 -ldevmapper $(DAEMON_LIBS)
install_lvm2: libdevmapper-event-lvm2snapshot.$(LIB_SUFFIX)
$(INSTALL) -D $(OWNER) $(GROUP) -m 555 $(STRIP) $< \
$(libdir)/$<.$(LIB_VERSION)
$(LN_S) -f $<.$(LIB_VERSION) $(libdir)/$<
install_lvm2: install_dm_plugin
install: install_lvm2

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2007-2008 Red Hat, Inc. All rights reserved.
* Copyright (C) 2007-2011 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -21,14 +21,18 @@
#include "lvm-string.h"
#include <pthread.h>
#include <sys/wait.h>
#include <syslog.h> /* FIXME Replace syslog with multilog */
/* FIXME Missing openlog? */
/* First warning when snapshot is 80% full. */
#define WARNING_THRESH 80
/* Further warnings at 85%, 90% and 95% fullness. */
#define WARNING_STEP 5
/* Run a check every 5%. */
#define CHECK_STEP 5
/* Do not bother checking snapshots less than 50% full. */
#define CHECK_MINIMUM 50
#define UMOUNT_COMMAND "/bin/umount"
struct snap_status {
int invalid;
@@ -36,9 +40,15 @@ struct snap_status {
int max;
};
struct dso_state {
int percent_check;
int known_size;
char cmd_str[1024];
};
/* FIXME possibly reconcile this with target_percent when we gain
access to regular LVM library here. */
static void _parse_snapshot_params(char *params, struct snap_status *stat)
static void _parse_snapshot_params(char *params, struct snap_status *status)
{
char *p;
/*
@@ -46,10 +56,10 @@ static void _parse_snapshot_params(char *params, struct snap_status *stat)
* Invalid -- snapshot invalidated
* Unknown -- status unknown
*/
stat->used = stat->max = 0;
status->used = status->max = 0;
if (!strncmp(params, "Invalid", 7)) {
stat->invalid = 1;
status->invalid = 1;
return;
}
@@ -66,24 +76,110 @@ static void _parse_snapshot_params(char *params, struct snap_status *stat)
*p = '\0';
p++;
stat->used = atoi(params);
stat->max = atoi(p);
status->used = atoi(params);
status->max = atoi(p);
}
static int _run(const char *cmd, ...)
{
va_list ap;
int argc = 1; /* for argv[0], i.e. cmd */
int i = 0;
const char **argv;
pid_t pid = fork();
int status;
if (pid == 0) { /* child */
va_start(ap, cmd);
while (va_arg(ap, const char *))
++ argc;
va_end(ap);
/* + 1 for the terminating NULL */
argv = alloca(sizeof(const char *) * (argc + 1));
argv[0] = cmd;
va_start(ap, cmd);
while ((argv[++i] = va_arg(ap, const char *)));
va_end(ap);
execvp(cmd, (char **)argv);
syslog(LOG_ERR, "Failed to execute %s: %s.\n", cmd, strerror(errno));
exit(127);
}
if (pid > 0) { /* parent */
if (waitpid(pid, &status, 0) != pid)
return 0; /* waitpid failed */
if (!WIFEXITED(status) || WEXITSTATUS(status))
return 0; /* the child failed */
}
if (pid < 0)
return 0; /* fork failed */
return 1; /* all good */
}
static int _extend(const char *cmd)
{
return dmeventd_lvm2_run(cmd) == ECMD_PROCESSED;
}
static void _umount(const char *device, int major, int minor)
{
FILE *mounts;
char buffer[4096];
char *words[3];
struct stat st;
if (!(mounts = fopen("/proc/mounts", "r"))) {
syslog(LOG_ERR, "Could not read /proc/mounts. Not umounting %s.\n", device);
return;
}
while (!feof(mounts)) {
/* read a line of /proc/mounts */
if (!fgets(buffer, sizeof(buffer), mounts))
break; /* eof, likely */
/* words[0] is the mount point and words[1] is the device path */
if (dm_split_words(buffer, 3, 0, words) < 2)
continue;
/* find the major/minor of the device */
if (stat(words[0], &st))
continue; /* can't stat, skip this one */
if (S_ISBLK(st.st_mode) &&
major(st.st_rdev) == major &&
minor(st.st_rdev) == minor) {
syslog(LOG_ERR, "Unmounting invalid snapshot %s from %s.\n", device, words[1]);
if (!_run(UMOUNT_COMMAND, "-fl", words[1], NULL))
syslog(LOG_ERR, "Failed to umount snapshot %s from %s: %s.\n",
device, words[1], strerror(errno));
}
}
if (fclose(mounts))
syslog(LOG_ERR, "Failed to close /proc/mounts.\n");
}
void process_event(struct dm_task *dmt,
enum dm_event_mask event __attribute((unused)),
enum dm_event_mask event __attribute__((unused)),
void **private)
{
void *next = NULL;
uint64_t start, length;
char *target_type = NULL;
char *params;
struct snap_status stat = { 0 };
struct snap_status status = { 0 };
const char *device = dm_task_get_name(dmt);
int percent, *percent_warning = (int*)private;
int percent;
struct dso_state *state = *private;
/* No longer monitoring, waiting for remove */
if (!*percent_warning)
if (!state->percent_check)
return;
dmeventd_lvm2_lock();
@@ -92,49 +188,96 @@ void process_event(struct dm_task *dmt,
if (!target_type)
goto out;
_parse_snapshot_params(params, &stat);
_parse_snapshot_params(params, &status);
if (status.invalid) {
struct dm_info info;
if (dm_task_get_info(dmt, &info)) {
dmeventd_lvm2_unlock();
_umount(device, info.major, info.minor);
return;
} /* else; too bad, but this is best-effort thing... */
}
/* Snapshot size had changed. Clear the threshold. */
if (state->known_size != status.max) {
state->percent_check = CHECK_MINIMUM;
state->known_size = status.max;
}
/*
* If the snapshot has been invalidated or we failed to parse
* the status string. Report the full status string to syslog.
*/
if (stat.invalid || !stat.max) {
if (status.invalid || !status.max) {
syslog(LOG_ERR, "Snapshot %s changed state to: %s\n", device, params);
*percent_warning = 0;
state->percent_check = 0;
goto out;
}
percent = 100 * stat.used / stat.max;
if (percent >= *percent_warning) {
syslog(LOG_WARNING, "Snapshot %s is now %i%% full.\n", device, percent);
/* Print warning on the next multiple of WARNING_STEP. */
*percent_warning = (percent / WARNING_STEP) * WARNING_STEP + WARNING_STEP;
percent = 100 * status.used / status.max;
if (percent >= state->percent_check) {
/* Usage has raised more than CHECK_STEP since the last
time. Run actions. */
state->percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP;
if (percent >= WARNING_THRESH) /* Print a warning to syslog. */
syslog(LOG_WARNING, "Snapshot %s is now %i%% full.\n", device, percent);
/* Try to extend the snapshot, in accord with user-set policies */
if (!_extend(state->cmd_str))
syslog(LOG_ERR, "Failed to extend snapshot %s.\n", device);
}
out:
dmeventd_lvm2_unlock();
}
int register_device(const char *device,
const char *uuid __attribute((unused)),
int major __attribute((unused)),
int minor __attribute((unused)),
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
void **private)
{
int *percent_warning = (int*)private;
struct dso_state *state;
*percent_warning = WARNING_THRESH; /* Print warning if snapshot is full */
if (!dmeventd_lvm2_init())
goto out;
if (!(state = dm_zalloc(sizeof(*state))))
goto bad;
if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(),
state->cmd_str, sizeof(state->cmd_str),
"lvextend --use-policies", device))
goto bad;
state->percent_check = CHECK_MINIMUM;
state->known_size = 0;
*private = state;
syslog(LOG_INFO, "Monitoring snapshot %s\n", device);
return dmeventd_lvm2_init();
return 1;
bad:
dm_free(state);
dmeventd_lvm2_exit();
out:
syslog(LOG_ERR, "Failed to monitor snapshot %s.\n", device);
return 0;
}
int unregister_device(const char *device,
const char *uuid __attribute((unused)),
int major __attribute((unused)),
int minor __attribute((unused)),
void **unused __attribute((unused)))
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
void **private)
{
syslog(LOG_INFO, "No longer monitoring snapshot %s\n",
device);
struct dso_state *state = *private;
syslog(LOG_INFO, "No longer monitoring snapshot %s\n", device);
dm_free(state);
dmeventd_lvm2_exit();
return 1;
}

View File

@@ -0,0 +1,3 @@
process_event
register_device
unregister_device

View File

@@ -0,0 +1,36 @@
#
# Copyright (C) 2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
# of the GNU General Public License v.2.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2
CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2
SOURCES = dmeventd_thin.c
LIB_NAME = libdevmapper-event-lvm2thin
LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX)
LIB_VERSION = $(LIB_VERSION_LVM)
CFLOW_LIST = $(SOURCES)
CFLOW_LIST_TARGET = $(LIB_NAME).cflow
include $(top_builddir)/make.tmpl
LIBS += -ldevmapper-event-lvm2 -ldevmapper
install_lvm2: install_dm_plugin
install: install_lvm2

View File

@@ -0,0 +1,288 @@
/*
* Copyright (C) 2011 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "lib.h"
#include "lvm2cmd.h"
#include "errors.h"
#include "libdevmapper-event.h"
#include "dmeventd_lvm.h"
#include "lvm-string.h"
#include <sys/wait.h>
#include <syslog.h> /* FIXME Replace syslog with multilog */
/* FIXME Missing openlog? */
/* First warning when thin is 80% full. */
#define WARNING_THRESH 80
/* Run a check every 5%. */
#define CHECK_STEP 5
/* Do not bother checking thins less than 50% full. */
#define CHECK_MINIMUM 50
#define UMOUNT_COMMAND "/bin/umount"
#define THIN_DEBUG 0
struct dso_state {
struct dm_pool *mem;
int metadata_percent_check;
int data_percent_check;
uint64_t known_metadata_size;
uint64_t known_data_size;
char cmd_str[1024];
};
static int _extend(struct dso_state *state)
{
#if THIN_DEBUG
syslog(LOG_INFO, "dmeventd executes: %s.\n", state->cmd_str);
#endif
return (dmeventd_lvm2_run(state->cmd_str) == ECMD_PROCESSED);
}
#if 0
static int _run(const char *cmd, ...)
{
va_list ap;
int argc = 1; /* for argv[0], i.e. cmd */
int i = 0;
const char **argv;
pid_t pid = fork();
int status;
if (pid == 0) { /* child */
va_start(ap, cmd);
while (va_arg(ap, const char *))
++argc;
va_end(ap);
/* + 1 for the terminating NULL */
argv = alloca(sizeof(const char *) * (argc + 1));
argv[0] = cmd;
va_start(ap, cmd);
while ((argv[++i] = va_arg(ap, const char *)));
va_end(ap);
execvp(cmd, (char **)argv);
syslog(LOG_ERR, "Failed to execute %s: %s.\n", cmd, strerror(errno));
exit(127);
}
if (pid > 0) { /* parent */
if (waitpid(pid, &status, 0) != pid)
return 0; /* waitpid failed */
if (!WIFEXITED(status) || WEXITSTATUS(status))
return 0; /* the child failed */
}
if (pid < 0)
return 0; /* fork failed */
return 1; /* all good */
}
/* FIXME: all thin pool users needs to be here */
static void _umount(const char *device, int major, int minor)
{
FILE *mounts;
char buffer[4096];
char *words[3];
struct stat st;
if (!(mounts = fopen("/proc/mounts", "r"))) {
syslog(LOG_ERR, "Could not read /proc/mounts. Not umounting %s.\n", device);
return;
}
while (!feof(mounts)) {
/* read a line of /proc/mounts */
if (!fgets(buffer, sizeof(buffer), mounts))
break; /* eof, likely */
/* words[0] is the mount point and words[1] is the device path */
dm_split_words(buffer, 3, 0, words);
/* find the major/minor of the device */
if (stat(words[0], &st))
continue; /* can't stat, skip this one */
if (S_ISBLK(st.st_mode) &&
(int) major(st.st_rdev) == major &&
(int) minor(st.st_rdev) == minor) {
syslog(LOG_ERR, "Unmounting invalid thin %s from %s.\n", device, words[1]);
if (!_run(UMOUNT_COMMAND, "-fl", words[1], NULL))
syslog(LOG_ERR, "Failed to umount thin %s from %s: %s.\n",
device, words[1], strerror(errno));
}
}
if (fclose(mounts))
syslog(LOG_ERR, "Failed to close /proc/mounts.\n");
}
#endif
void process_event(struct dm_task *dmt,
enum dm_event_mask event __attribute__((unused)),
void **private)
{
const char *device = dm_task_get_name(dmt);
int percent;
struct dso_state *state = *private;
struct dm_status_thin_pool *tps = NULL;
void *next = NULL;
uint64_t start, length;
char *target_type = NULL;
char *params;
#if 0
/* No longer monitoring, waiting for remove */
if (!state->meta_percent_check && !state->data_percent_check)
return;
#endif
dmeventd_lvm2_lock();
dm_get_next_target(dmt, next, &start, &length, &target_type, &params);
if (!target_type || (strcmp(target_type, "thin-pool") != 0)) {
syslog(LOG_ERR, "Invalid target type.\n");
goto out;
}
if (!dm_get_status_thin_pool(state->mem, params, &tps)) {
syslog(LOG_ERR, "Failed to parse status.\n");
#if 0
/* FIXME hmm what we should do? */
struct dm_info info;
if (dm_task_get_info(dmt, &info)) {
dmeventd_lvm2_unlock();
_umount(device, info.major, info.minor);
} /* else; too bad, but this is best-effort thing... */
#endif
goto out;
}
#if THIN_DEBUG
syslog(LOG_INFO, "%p: Got status %" PRIu64 " / %" PRIu64
" %" PRIu64 " / %" PRIu64 ".\n", state,
tps->used_metadata_blocks, tps->total_metadata_blocks,
tps->used_data_blocks, tps->total_data_blocks);
#endif
/* Thin pool size had changed. Clear the threshold. */
if (state->known_metadata_size != tps->total_metadata_blocks) {
state->metadata_percent_check = CHECK_MINIMUM;
state->known_metadata_size = tps->total_metadata_blocks;
}
if (state->known_data_size != tps->total_data_blocks) {
state->data_percent_check = CHECK_MINIMUM;
state->known_data_size = tps->total_data_blocks;
}
percent = 100 * tps->used_metadata_blocks / tps->total_metadata_blocks;
if (percent >= state->metadata_percent_check) {
/*
* Usage has raised more than CHECK_STEP since the last
* time. Run actions.
*/
state->metadata_percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP;
/* FIXME: extension of metadata needs to be written! */
if (percent >= WARNING_THRESH) /* Print a warning to syslog. */
syslog(LOG_WARNING, "Thin metadata %s is now %i%% full.\n",
device, percent);
/* Try to extend the metadata, in accord with user-set policies */
if (!_extend(state))
syslog(LOG_ERR, "Failed to extend thin metadata %s.\n",
device);
/* FIXME: hmm READ-ONLY switch should happen in error path */
}
percent = 100 * tps->used_data_blocks / tps->total_data_blocks;
if (percent >= state->data_percent_check) {
/*
* Usage has raised more than CHECK_STEP since
* the last time. Run actions.
*/
state->data_percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP;
if (percent >= WARNING_THRESH) /* Print a warning to syslog. */
syslog(LOG_WARNING, "Thin %s is now %i%% full.\n", device, percent);
/* Try to extend the thin data, in accord with user-set policies */
if (!_extend(state))
syslog(LOG_ERR, "Failed to extend thin %s.\n", device);
/* FIXME: hmm READ-ONLY switch should happen in error path */
}
out:
if (tps)
dm_pool_free(state->mem, tps);
dmeventd_lvm2_unlock();
}
int register_device(const char *device,
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
void **private)
{
struct dm_pool *statemem = NULL;
struct dso_state *state;
if (!dmeventd_lvm2_init())
goto bad;
if (!(statemem = dm_pool_create("thin_pool_state", 2048)) ||
!(state = dm_pool_zalloc(statemem, sizeof(*state))) ||
!dmeventd_lvm2_command(statemem, state->cmd_str,
sizeof(state->cmd_str),
"lvextend --use-policies",
device)) {
if (statemem)
dm_pool_destroy(statemem);
dmeventd_lvm2_exit();
goto bad;
}
state->mem = statemem;
state->metadata_percent_check = CHECK_MINIMUM;
state->data_percent_check = CHECK_MINIMUM;
*private = state;
syslog(LOG_INFO, "Monitoring thin %s.\n", device);
return 1;
bad:
syslog(LOG_ERR, "Failed to monitor thin %s.\n", device);
return 0;
}
int unregister_device(const char *device,
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
void **private)
{
struct dso_state *state = *private;
syslog(LOG_INFO, "No longer monitoring thin %s.\n", device);
dm_pool_destroy(state->mem);
dmeventd_lvm2_exit();
return 1;
}

View File

@@ -0,0 +1,59 @@
#
# Copyright (C) 2011-2012 Red Hat, Inc.
#
# This file is part of LVM2.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
# of the GNU Lesser General Public License v.2.1.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
SOURCES = lvmetad-core.c
SOURCES2 = testclient.c
TARGETS = lvmetad lvmetad-testclient
.PHONY: install_lvmetad
CFLOW_LIST = $(SOURCES)
CFLOW_LIST_TARGET = $(LIB_NAME).cflow
CFLOW_TARGET = lvmetad
include $(top_builddir)/make.tmpl
INCLUDES += -I$(top_srcdir)/libdaemon/server
LVMLIBS = -ldaemonserver $(LVMINTERNAL_LIBS) -ldevmapper
LIBS += $(PTHREAD_LIBS)
LDFLAGS += -L$(top_builddir)/libdaemon/server
CLDFLAGS += -L$(top_builddir)/libdaemon/server
lvmetad: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
$(top_builddir)/libdaemon/server/libdaemonserver.a
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) \
$(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic
# TODO: No idea. No idea how to test either.
#ifneq ("$(CFLOW_CMD)", "")
#CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES))
#-include $(top_builddir)/libdm/libdevmapper.cflow
#-include $(top_builddir)/lib/liblvm-internal.cflow
#-include $(top_builddir)/lib/liblvm2cmd.cflow
#-include $(top_builddir)/daemons/dmeventd/$(LIB_NAME).cflow
#-include $(top_builddir)/daemons/dmeventd/plugins/mirror/$(LIB_NAME)-lvm2mirror.cflow
#endif
install_lvmetad: lvmetad
$(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
install_lvm2: install_lvmetad
install: install_lvm2

View File

@@ -0,0 +1,81 @@
/*
* Copyright (C) 2011-2012 Red Hat, Inc.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _LVM_LVMETAD_CLIENT_H
#define _LVM_LVMETAD_CLIENT_H
#include "daemon-client.h"
struct volume_group;
/* Different types of replies we may get from lvmetad. */
typedef struct {
daemon_reply r;
const char **uuids; /* NULL terminated array */
} lvmetad_uuidlist;
typedef struct {
daemon_reply r;
struct dm_config_tree *cft;
} lvmetad_vg;
/* Get a list of VG UUIDs that match a given VG name. */
lvmetad_uuidlist lvmetad_lookup_vgname(daemon_handle h, const char *name);
/* Get the metadata of a single VG, identified by UUID. */
lvmetad_vg lvmetad_get_vg(daemon_handle h, const char *uuid);
/*
* Add and remove PVs on demand. Udev-driven systems will use this interface
* instead of scanning.
*/
daemon_reply lvmetad_add_pv(daemon_handle h, const char *pv_uuid, const char *mda_content);
daemon_reply lvmetad_remove_pv(daemon_handle h, const char *pv_uuid);
/* Trigger a full disk scan, throwing away all caches. XXX do we eventually want
* this? Probably not yet, anyway.
* daemon_reply lvmetad_rescan(daemon_handle h);
*/
/*
* Update the version of metadata of a volume group. The VG has to be locked for
* writing for this, and the VG metadata here has to match whatever has been
* written to the disk (under this lock). This initially avoids the requirement
* for lvmetad to write to disk (in later revisions, lvmetad_supersede_vg may
* also do the writing, or we probably add another function to do that).
*/
daemon_reply lvmetad_supersede_vg(daemon_handle h, struct volume_group *vg);
/* Wrappers to open/close connection */
static inline daemon_handle lvmetad_open(const char *socket)
{
daemon_info lvmetad_info = {
.path = "lvmetad",
.socket = socket ?: DEFAULT_RUN_DIR "/lvmetad.socket",
.protocol = "lvmetad",
.protocol_version = 1,
.autostart = 0
};
return daemon_open(lvmetad_info);
}
static inline void lvmetad_close(daemon_handle h)
{
return daemon_close(h);
}
#endif

File diff suppressed because it is too large Load Diff

16
daemons/lvmetad/test.sh Executable file
View File

@@ -0,0 +1,16 @@
#!/bin/bash
export LD_LIBRARY_PATH="$1"
test -n "$2" && {
rm -f /var/run/lvmetad.{socket,pid}
chmod +rx lvmetad
valgrind ./lvmetad -f &
PID=$!
sleep 1
./testclient
kill $PID
exit 0
}
sudo ./test.sh "$1" .

View File

@@ -0,0 +1,127 @@
#include "lvmetad-client.h"
#include "label.h"
#include "lvmcache.h"
#include "metadata.h"
const char *uuid1 = "abcd-efgh";
const char *uuid2 = "bbcd-efgh";
const char *vgid = "yada-yada";
const char *uuid3 = "cbcd-efgh";
const char *metadata2 = "{\n"
"id = \"yada-yada\"\n"
"seqno = 15\n"
"status = [\"READ\", \"WRITE\"]\n"
"flags = []\n"
"extent_size = 8192\n"
"physical_volumes {\n"
" pv0 {\n"
" id = \"abcd-efgh\"\n"
" }\n"
" pv1 {\n"
" id = \"bbcd-efgh\"\n"
" }\n"
" pv2 {\n"
" id = \"cbcd-efgh\"\n"
" }\n"
"}\n"
"}\n";
void _handle_reply(daemon_reply reply) {
const char *repl = daemon_reply_str(reply, "response", NULL);
const char *status = daemon_reply_str(reply, "status", NULL);
const char *vgid = daemon_reply_str(reply, "vgid", NULL);
fprintf(stderr, "[C] REPLY: %s\n", repl);
if (!strcmp(repl, "failed"))
fprintf(stderr, "[C] REASON: %s\n", daemon_reply_str(reply, "reason", "unknown"));
if (vgid)
fprintf(stderr, "[C] VGID: %s\n", vgid);
if (status)
fprintf(stderr, "[C] STATUS: %s\n", status);
daemon_reply_destroy(reply);
}
void _pv_add(daemon_handle h, const char *uuid, const char *metadata)
{
daemon_reply reply = daemon_send_simple(h, "pv_add", "uuid = %s", uuid,
"metadata = %b", metadata,
NULL);
_handle_reply(reply);
}
int scan(daemon_handle h, char *fn) {
struct device *dev = dev_cache_get(fn, NULL);
struct label *label;
if (!label_read(dev, &label, 0)) {
fprintf(stderr, "[C] no label found on %s\n", fn);
return;
}
char uuid[64];
id_write_format(dev->pvid, uuid, 64);
fprintf(stderr, "[C] found PV: %s\n", uuid);
struct lvmcache_info *info = (struct lvmcache_info *) label->info;
struct physical_volume pv = { 0, };
if (!(info->fmt->ops->pv_read(info->fmt, dev_name(dev), &pv, 0))) {
fprintf(stderr, "[C] Failed to read PV %s", dev_name(dev));
return;
}
struct format_instance_ctx fic;
struct format_instance *fid = info->fmt->ops->create_instance(info->fmt, &fic);
struct metadata_area *mda;
struct volume_group *vg = NULL;
dm_list_iterate_items(mda, &info->mdas) {
struct volume_group *this = mda->ops->vg_read(fid, "", mda);
if (this && !vg || this->seqno > vg->seqno)
vg = this;
}
if (vg) {
char *buf = NULL;
/* TODO. This is not entirely correct, since export_vg_to_buffer
* adds trailing garbage to the buffer. We may need to use
* export_vg_to_config_tree and format the buffer ourselves. It
* does, however, work for now, since the garbage is well
* formatted and has no conflicting keys with the rest of the
* request. */
export_vg_to_buffer(vg, &buf);
daemon_reply reply =
daemon_send_simple(h, "pv_add", "uuid = %s", uuid,
"metadata = %b", strchr(buf, '{'),
NULL);
_handle_reply(reply);
}
}
void _dump_vg(daemon_handle h, const char *uuid)
{
daemon_reply reply = daemon_send_simple(h, "vg_by_uuid", "uuid = %s", uuid, NULL);
fprintf(stderr, "[C] reply buffer: %s\n", reply.buffer);
daemon_reply_destroy(reply);
}
int main(int argc, char **argv) {
daemon_handle h = lvmetad_open();
if (argc > 1) {
int i;
struct cmd_context *cmd = create_toolcontext(0, NULL, 0, 0);
for (i = 1; i < argc; ++i) {
const char *uuid = NULL;
scan(h, argv[i]);
}
destroy_toolcontext(cmd);
return 0;
}
_pv_add(h, uuid1, NULL);
_pv_add(h, uuid2, metadata2);
_dump_vg(h, vgid);
_pv_add(h, uuid3, NULL);
daemon_close(h);
return 0;
}

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -14,18 +14,18 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
CONFSRC=example.conf
CONFDEST=lvm.conf
include ../make.tmpl
include $(top_builddir)/make.tmpl
install_lvm2: $(CONFSRC)
@if [ ! -e $(confdir)/$(CONFDEST) ]; then \
echo "Installing $(CONFSRC) as $(confdir)/$(CONFDEST)"; \
@INSTALL@ -D $(OWNER) $(GROUP) -m 644 $< \
$(confdir)/$(CONFDEST); \
echo "$(INSTALL_WDATA) -D $< $(confdir)/$(CONFDEST)"; \
$(INSTALL_WDATA) -D $< $(confdir)/$(CONFDEST); \
fi
install: install_lvm2
DISTCLEAN_TARGETS += $(CONFSRC)

View File

@@ -1,11 +1,14 @@
# This is an example configuration file for the LVM2 system.
# It contains the default settings that would be used if there was no
# /etc/lvm/lvm.conf file.
# @DEFAULT_SYS_DIR@/lvm.conf file.
#
# Refer to 'man lvm.conf' for further information including the file layout.
#
# To put this file in a different directory and override /etc/lvm set
# To put this file in a different directory and override @DEFAULT_SYS_DIR@ set
# the environment variable LVM_SYSTEM_DIR before running the tools.
#
# N.B. Take care that each setting only appears once if uncommenting
# example settings in this file.
# This section allows you to configure which block devices should
@@ -19,6 +22,16 @@ devices {
# to use with LVM2.
scan = [ "/dev" ]
# If set, the cache of block device nodes with all associated symlinks
# will be constructed out of the existing udev database content.
# This avoids using and opening any inapplicable non-block devices or
# subdirectories found in the device directory. This setting is applied
# to udev-managed device directory only, other directories will be scanned
# fully. LVM2 needs to be compiled with udev support for this setting to
# take effect. N.B. Any device node or symlink not managed by udev in
# udev directory will be ignored with this setting on.
obtain_device_list_from_udev = 1
# If several entries in the scanned directories correspond to the
# same block device and the tools need to display a name for device,
# all the pathnames are matched against each item in the following
@@ -38,9 +51,11 @@ devices {
# Be careful if there there are symbolic links or multiple filesystem
# entries for the same device as each name is checked separately against
# the list of patterns. The effect is that if any name matches any 'a'
# pattern, the device is accepted; otherwise if any name matches any 'r'
# pattern it is rejected; otherwise it is accepted.
# the list of patterns. The effect is that if the first pattern in the
# list to match a name is an 'a' pattern for any of the names, the device
# is accepted; otherwise if the first pattern in the list to match a name
# is an 'r' pattern for any of the names it is rejected; otherwise it is
# accepted.
# Don't have more than one filter line active at once: only one gets used.
@@ -66,12 +81,12 @@ devices {
# The results of the filtering are cached on disk to avoid
# rescanning dud devices (which can take a very long time).
# By default this cache is stored in the /etc/lvm/cache directory
# By default this cache is stored in the @DEFAULT_SYS_DIR@/@DEFAULT_CACHE_SUBDIR@ directory
# in a file called '.cache'.
# It is safe to delete the contents: the tools regenerate it.
# (The old setting 'cache' is still respected if neither of
# these new ones is present.)
cache_dir = "/etc/lvm/cache"
cache_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_CACHE_SUBDIR@"
cache_file_prefix = ""
# You can turn off writing this cache file by setting this to 0.
@@ -88,6 +103,11 @@ devices {
# 1 enables; 0 disables.
sysfs_scan = 1
# By default, LVM2 will ignore devices used as component paths
# of device-mapper multipath devices.
# 1 enables; 0 disables.
multipath_component_detection = 1
# By default, LVM2 will ignore devices used as components of
# software RAID (md) devices by looking for md superblocks.
# 1 enables; 0 disables.
@@ -98,6 +118,10 @@ devices {
# 1 enables; 0 disables.
md_chunk_alignment = 1
# Default alignment of the start of a data area in MB. If set to 0,
# a value of 64KB will be used. Set to 1 for 1MiB, 2 for 2MiB, etc.
# default_data_alignment = @DEFAULT_DATA_ALIGNMENT@
# By default, the start of a PV's data area will be a multiple of
# the 'minimum_io_size' or 'optimal_io_size' exposed in sysfs.
# - minimum_io_size - the smallest request the device can perform
@@ -111,9 +135,9 @@ devices {
data_alignment_detection = 1
# Alignment (in KB) of start of data area when creating a new PV.
# If a PV is placed directly upon an md device and md_chunk_alignment or
# data_alignment_detection is enabled this parameter is ignored.
# Set to 0 for the default alignment of 64KB or page size, if larger.
# md_chunk_alignment and data_alignment_detection are disabled if set.
# Set to 0 for the default alignment (see: data_alignment_default)
# or page size, if larger.
data_alignment = 0
# By default, the start of the PV's aligned data area will be shifted by
@@ -122,6 +146,7 @@ devices {
# windows partitioning will have an alignment_offset of 3584 bytes
# (sector 7 is the lowest aligned logical block, the 4KB sectors start
# at LBA -1, and consequently sector 63 is aligned on a 4KB boundary).
# But note that pvcreate --dataalignmentoffset will skip this detection.
# 1 enables; 0 disables.
data_alignment_offset_detection = 1
@@ -130,6 +155,72 @@ devices {
# Set this to 1 to skip such devices. This should only be needed
# in recovery situations.
ignore_suspended_devices = 0
# During each LVM operation errors received from each device are counted.
# If the counter of a particular device exceeds the limit set here, no
# further I/O is sent to that device for the remainder of the respective
# operation. Setting the parameter to 0 disables the counters altogether.
disable_after_error_count = 0
# Allow use of pvcreate --uuid without requiring --restorefile.
require_restorefile_with_uuid = 1
# Minimum size (in KB) of block devices which can be used as PVs.
# In a clustered environment all nodes must use the same value.
# Any value smaller than 512KB is ignored.
# Ignore devices smaller than 2MB such as floppy drives.
pv_min_size = 2048
# The original built-in setting was 512 up to and including version 2.02.84.
# pv_min_size = 512
# Issue discards to a logical volumes's underlying physical volume(s) when
# the logical volume is no longer using the physical volumes' space (e.g.
# lvremove, lvreduce, etc). Discards inform the storage that a region is
# no longer in use. Storage that supports discards advertise the protocol
# specific way discards should be issued by the kernel (TRIM, UNMAP, or
# WRITE SAME with UNMAP bit set). Not all storage will support or benefit
# from discards but SSDs and thinly provisioned LUNs generally do. If set
# to 1, discards will only be issued if both the storage and kernel provide
# support.
# 1 enables; 0 disables.
issue_discards = 0
}
# This section allows you to configure the way in which LVM selects
# free space for its Logical Volumes.
allocation {
# When searching for free space to extend an LV, the "cling"
# allocation policy will choose space on the same PVs as the last
# segment of the existing LV. If there is insufficient space and a
# list of tags is defined here, it will check whether any of them are
# attached to the PVs concerned and then seek to match those PV tags
# between existing extents and new extents.
# Use the special tag "@*" as a wildcard to match any PV tag.
# Example: LVs are mirrored between two sites within a single VG.
# PVs are tagged with either @site1 or @site2 to indicate where
# they are situated.
# cling_tag_list = [ "@site1", "@site2" ]
# cling_tag_list = [ "@*" ]
# Changes made in version 2.02.85 extended the reach of the 'cling'
# policies to detect more situations where data can be grouped
# onto the same disks. Set this to 0 to revert to the previous
# algorithm.
maximise_cling = 1
# Set to 1 to guarantee that mirror logs will always be placed on
# different PVs from the mirror images. This was the default
# until version 2.02.85.
mirror_logs_require_separate_pvs = 0
# Set to 1 to guarantee that thin pool metadata will always
# be placed on different PVs from the pool data.
thin_pool_metadata_require_separate_pvs = 0
}
# This section that allows you to configure the nature of the
@@ -192,7 +283,7 @@ backup {
# Where shall we keep it ?
# Remember to back up this directory regularly!
backup_dir = "/etc/lvm/backup"
backup_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_BACKUP_SUBDIR@"
# Should we maintain an archive of old metadata configurations.
# Use 1 for Yes; 0 for No.
@@ -201,7 +292,7 @@ backup {
# Where should archived files go ?
# Remember to back up this directory regularly!
archive_dir = "/etc/lvm/archive"
archive_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_ARCHIVE_SUBDIR@"
# What is the minimum number of archive files you wish to keep ?
retain_min = 10
@@ -295,7 +386,7 @@ global {
# Local non-LV directory that holds file-based locks while commands are
# in progress. A directory like /tmp that may get wiped on reboot is OK.
locking_dir = "/var/lock/lvm"
locking_dir = "@DEFAULT_LOCK_DIR@"
# Whenever there are competing read-only and read-write access requests for
# a volume group's metadata, instead of always granting the read-only
@@ -320,9 +411,86 @@ global {
# Treat any internal errors as fatal errors, aborting the process that
# encountered the internal error. Please only enable for debugging.
abort_on_internal_errors = 0
# Check whether CRC is matching when parsed VG is used multiple times.
# This is useful to catch unexpected internal cached volume group
# structure modification. Please only enable for debugging.
detect_internal_vg_cache_corruption = 0
# If set to 1, no operations that change on-disk metadata will be permitted.
# Additionally, read-only commands that encounter metadata in need of repair
# will still be allowed to proceed exactly as if the repair had been
# performed (except for the unchanged vg_seqno).
# Inappropriate use could mess up your system, so seek advice first!
metadata_read_only = 0
# 'mirror_segtype_default' defines which segtype will be used when the
# shorthand '-m' option is used for mirroring. The possible options are:
#
# "mirror" - The original RAID1 implementation provided by LVM2/DM. It is
# characterized by a flexible log solution (core, disk, mirrored)
# and by the necessity to block I/O while reconfiguring in the
# event of a failure.
#
# There is an inherent race in the dmeventd failure handling
# logic with snapshots of devices using this type of RAID1 that
# in the worst case could cause a deadlock.
# Ref: https://bugzilla.redhat.com/show_bug.cgi?id=817130#c10
#
# "raid1" - This implementation leverages MD's RAID1 personality through
# device-mapper. It is characterized by a lack of log options.
# (A log is always allocated for every device and they are placed
# on the same device as the image - no separate devices are
# required.) This mirror implementation does not require I/O
# to be blocked in the kernel in the event of a failure.
# This mirror implementation is not cluster-aware and cannot be
# used in a shared (active/active) fashion in a cluster.
#
# Specify the '--type <mirror|raid1>' option to override this default
# setting.
mirror_segtype_default = "mirror"
# The default format for displaying LV names in lvdisplay was changed
# in version 2.02.89 to show the LV name and path separately.
# Previously this was always shown as /dev/vgname/lvname even when that
# was never a valid path in the /dev filesystem.
# Set to 1 to reinstate the previous format.
#
# lvdisplay_shows_full_device_path = 0
# Whether to use (trust) a running instance of lvmetad. If this is set to
# 0, all commands fall back to the usual scanning mechanisms. When set to 1
# *and* when lvmetad is running (it is not auto-started), the volume group
# metadata and PV state flags are obtained from the lvmetad instance and no
# scanning is done by the individual commands. In a setup with lvmetad,
# lvmetad udev rules *must* be set up for LVM to work correctly. Without
# proper udev rules, all changes in block device configuration will be
# *ignored* until a manual 'vgscan' is performed.
use_lvmetad = 0
# Full path of the utility called to check that a thin metadata device
# is in a state that allows it to be used.
# Each time a thin pool needs to be activated or after it is deactivated
# this utility is executed. The activation will only proceed if the utility
# has an exit status of 0.
# Set to "" to skip this check. (Not recommended.)
# The thin tools are available as part of the device-mapper-persistent-data
# package from https://github.com/jthornber/thin-provisioning-tools.
#
thin_check_executable = "@THIN_CHECK_CMD@"
# String with options passed with thin_check command. By default,
# option '-q' is for quiet output.
thin_check_options = [ "-q" ]
}
activation {
# Set to 1 to perform internal checks on the operations issued to
# libdevmapper. Useful for debugging problems with activation.
# Some of the checks may be expensive, so it's best to use this
# only when there seems to be a problem.
checks = 0
# Set to 0 to disable udev synchronisation (if compiled into the binaries).
# Processes will not wait for notification from udev.
# They will continue irrespective of any possible udev processing
@@ -340,6 +508,17 @@ activation {
# while any logical volumes are active.
udev_rules = 1
# Set to 1 for LVM2 to verify operations performed by udev. This turns on
# additional checks (and if necessary, repairs) on entries in the device
# directory after udev has completed processing its events.
# Useful for diagnosing problems with LVM2/udev interactions.
verify_udev_operations = 0
# If set to 1 and if deactivation of an LV fails, perhaps because
# a process run from a quick udev rule temporarily opened the device,
# retry the operation for a few seconds before failing.
retry_deactivation = 1
# How to fill in missing stripes if activating an incomplete volume.
# Using "error" will make inaccessible parts of the device return
# I/O errors on access. You can instead use a device path, in which
@@ -348,8 +527,14 @@ activation {
# or snapshotted volumes is likely to result in data corruption.
missing_stripe_filler = "error"
# The linear target is an optimised version of the striped target
# that only handles a single stripe. Set this to 0 to disable this
# optimisation and always use the striped target.
use_linear_target = 1
# How much stack (in KB) to reserve for use while devices suspended
reserved_stack = 256
# Prior to version 2.02.89 this used to be set to 256KB
reserved_stack = 64
# How much memory (in KB) to reserve for use while devices suspended
reserved_memory = 8192
@@ -365,6 +550,16 @@ activation {
#
# volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
# If read_only_volume_list is defined, each LV that is to be activated
# is checked against the list, and if it matches, it as activated
# in read-only mode. (This overrides '--permission rw' stored in the
# metadata.)
# "vgname" and "vgname/lvname" are matched exactly.
# "@tag" matches any tag set in the LV or VG.
# "@*" matches if any tag defined on the host is also set in the LV or VG
#
# read_only_volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
# Size (in KB) of each copy operation when mirroring
mirror_region_size = 512
@@ -374,9 +569,31 @@ activation {
# "auto" - Use default value chosen by kernel.
readahead = "auto"
# 'raid_fault_policy' defines how a device failure in a RAID logical
# volume is handled. This includes logical volumes that have the following
# segment types: raid1, raid4, raid5*, and raid6*.
#
# In the event of a failure, the following policies will determine what
# actions are performed during the automated response to failures (when
# dmeventd is monitoring the RAID logical volume) and when 'lvconvert' is
# called manually with the options '--repair' and '--use-policies'.
#
# "warn" - Use the system log to warn the user that a device in the RAID
# logical volume has failed. It is left to the user to run
# 'lvconvert --repair' manually to remove or replace the failed
# device. As long as the number of failed devices does not
# exceed the redundancy of the logical volume (1 device for
# raid4/5, 2 for raid6, etc) the logical volume will remain
# usable.
#
# "allocate" - Attempt to use any extra physical volumes in the volume
# group as spares and replace faulty devices.
#
raid_fault_policy = "warn"
# 'mirror_image_fault_policy' and 'mirror_log_fault_policy' define
# how a device failure affecting a mirror is handled.
# A mirror is composed of mirror images (copies) and a log.
# how a device failure affecting a mirror (of "mirror" segment type) is
# handled. A mirror is composed of mirror images (copies) and a log.
# A disk log ensures that a mirror does not need to be re-synced
# (all copies made the same) every time a machine reboots or crashes.
#
@@ -413,6 +630,71 @@ activation {
mirror_log_fault_policy = "allocate"
mirror_image_fault_policy = "remove"
# 'snapshot_autoextend_threshold' and 'snapshot_autoextend_percent' define
# how to handle automatic snapshot extension. The former defines when the
# snapshot should be extended: when its space usage exceeds this many
# percent. The latter defines how much extra space should be allocated for
# the snapshot, in percent of its current size.
#
# For example, if you set snapshot_autoextend_threshold to 70 and
# snapshot_autoextend_percent to 20, whenever a snapshot exceeds 70% usage,
# it will be extended by another 20%. For a 1G snapshot, using up 700M will
# trigger a resize to 1.2G. When the usage exceeds 840M, the snapshot will
# be extended to 1.44G, and so on.
#
# Setting snapshot_autoextend_threshold to 100 disables automatic
# extensions. The minimum value is 50 (A setting below 50 will be treated
# as 50).
snapshot_autoextend_threshold = 100
snapshot_autoextend_percent = 20
# 'thin_pool_autoextend_threshold' and 'thin_pool_autoextend_percent' define
# how to handle automatic pool extension. The former defines when the
# pool should be extended: when its space usage exceeds this many
# percent. The latter defines how much extra space should be allocated for
# the pool, in percent of its current size.
#
# For example, if you set thin_pool_autoextend_threshold to 70 and
# thin_pool_autoextend_percent to 20, whenever a pool exceeds 70% usage,
# it will be extended by another 20%. For a 1G pool, using up 700M will
# trigger a resize to 1.2G. When the usage exceeds 840M, the pool will
# be extended to 1.44G, and so on.
#
# Setting thin_pool_autoextend_threshold to 100 disables automatic
# extensions. The minimum value is 50 (A setting below 50 will be treated
# as 50).
thin_pool_autoextend_threshold = 100
thin_pool_autoextend_percent = 20
# While activating devices, I/O to devices being (re)configured is
# suspended, and as a precaution against deadlocks, LVM2 needs to pin
# any memory it is using so it is not paged out. Groups of pages that
# are known not to be accessed during activation need not be pinned
# into memory. Each string listed in this setting is compared against
# each line in /proc/self/maps, and the pages corresponding to any
# lines that match are not pinned. On some systems locale-archive was
# found to make up over 80% of the memory used by the process.
# mlock_filter = [ "locale/locale-archive", "gconv/gconv-modules.cache" ]
# Set to 1 to revert to the default behaviour prior to version 2.02.62
# which used mlockall() to pin the whole process's memory while activating
# devices.
use_mlockall = 0
# Monitoring is enabled by default when activating logical volumes.
# Set to 0 to disable monitoring or use the --ignoremonitoring option.
monitoring = 1
# When pvmove or lvconvert must wait for the kernel to finish
# synchronising or merging data, they check and report progress
# at intervals of this number of seconds. The default is 15 seconds.
# If this is set to 0 and there is only one thing to wait for, there
# are no progress reports, but the process is awoken immediately the
# operation is complete.
polling_interval = 15
}
@@ -429,6 +711,19 @@ activation {
# pvmetadatacopies = 1
# Default number of copies of metadata to maintain for each VG.
# If set to a non-zero value, LVM automatically chooses which of
# the available metadata areas to use to achieve the requested
# number of copies of the VG metadata. If you set a value larger
# than the the total number of metadata areas available then
# metadata is stored in them all.
# The default value of 0 ("unmanaged") disables this automatic
# management and allows you to control which metadata areas
# are used at the individual PV level using 'pvchange
# --metadataignore y/n'.
# vgmetadatacopies = 0
# Approximate default size of on-disk metadata areas in sectors.
# You should increase this if you have large volume groups or
# you want to retain a large on-disk history of your metadata changes.
@@ -466,9 +761,22 @@ dmeventd {
# snapshot_library is the library used when monitoring a snapshot device.
#
# "libdevmapper-event-lvm2snapshot.so" monitors the filling of
# snapshots and emits a warning through syslog, when the use of
# snapshot exceedes 80%. The warning is repeated when 85%, 90% and
# 95% of the snapshot are filled.
# snapshots and emits a warning through syslog when the use of
# the snapshot exceeds 80%. The warning is repeated when 85%, 90% and
# 95% of the snapshot is filled.
snapshot_library = "libdevmapper-event-lvm2snapshot.so"
# thin_library is the library used when monitoring a thin device.
#
# "libdevmapper-event-lvm2thin.so" monitors the filling of
# pool and emits a warning through syslog when the use of
# the pool exceeds 80%. The warning is repeated when 85%, 90% and
# 95% of the pool is filled.
thin_library = "libdevmapper-event-lvm2thin.so"
# Full path of the dmeventd binary.
#
# executable = "@DMEVENTD_PATH@"
}

View File

@@ -13,10 +13,11 @@
*/
#include "lvm2cmd.h"
#include <stdio.h>
/* All output gets passed to this function line-by-line */
void test_log_fn(int level, int dm_errno, const char *file, int line,
const char *format)
void test_log_fn(int level, const char *file, int line,
int dm_errno, const char *format)
{
/* Extract and process output here rather than printing it */

76
doc/kernel/crypt.txt Normal file
View File

@@ -0,0 +1,76 @@
dm-crypt
=========
Device-Mapper's "crypt" target provides transparent encryption of block devices
using the kernel crypto API.
Parameters: <cipher> <key> <iv_offset> <device path> \
<offset> [<#opt_params> <opt_params>]
<cipher>
Encryption cipher and an optional IV generation mode.
(In format cipher[:keycount]-chainmode-ivopts:ivmode).
Examples:
des
aes-cbc-essiv:sha256
twofish-ecb
/proc/crypto contains supported crypto modes
<key>
Key used for encryption. It is encoded as a hexadecimal number.
You can only use key sizes that are valid for the selected cipher.
<keycount>
Multi-key compatibility mode. You can define <keycount> keys and
then sectors are encrypted according to their offsets (sector 0 uses key0;
sector 1 uses key1 etc.). <keycount> must be a power of two.
<iv_offset>
The IV offset is a sector count that is added to the sector number
before creating the IV.
<device path>
This is the device that is going to be used as backend and contains the
encrypted data. You can specify it as a path like /dev/xxx or a device
number <major>:<minor>.
<offset>
Starting sector within the device where the encrypted data begins.
<#opt_params>
Number of optional parameters. If there are no optional parameters,
the optional paramaters section can be skipped or #opt_params can be zero.
Otherwise #opt_params is the number of following arguments.
Example of optional parameters section:
1 allow_discards
allow_discards
Block discard requests (a.k.a. TRIM) are passed through the crypt device.
The default is to ignore discard requests.
WARNING: Assess the specific security risks carefully before enabling this
option. For example, allowing discards on encrypted devices may lead to
the leak of information about the ciphertext device (filesystem type,
used space etc.) if the discarded blocks can be located easily on the
device later.
Example scripts
===============
LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
encryption with dm-crypt using the 'cryptsetup' utility, see
http://code.google.com/p/cryptsetup/
[[
#!/bin/sh
# Create a crypt device using dmsetup
dmsetup create crypt1 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0"
]]
[[
#!/bin/sh
# Create a crypt device using cryptsetup and LUKS header with default cipher
cryptsetup luksFormat $1
cryptsetup luksOpen $1 crypt1
]]

26
doc/kernel/delay.txt Normal file
View File

@@ -0,0 +1,26 @@
dm-delay
========
Device-Mapper's "delay" target delays reads and/or writes
and maps them to different devices.
Parameters:
<device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
With separate write parameters, the first set is only used for reads.
Delays are specified in milliseconds.
Example scripts
===============
[[
#!/bin/sh
# Create device delaying rw operation for 500ms
echo "0 `blockdev --getsize $1` delay $1 0 500" | dmsetup create delayed
]]
[[
#!/bin/sh
# Create device delaying only write operation for 500ms and
# splitting reads and writes to different devices $1 $2
echo "0 `blockdev --getsize $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
]]

53
doc/kernel/flakey.txt Normal file
View File

@@ -0,0 +1,53 @@
dm-flakey
=========
This target is the same as the linear target except that it exhibits
unreliable behaviour periodically. It's been found useful in simulating
failing devices for testing purposes.
Starting from the time the table is loaded, the device is available for
<up interval> seconds, then exhibits unreliable behaviour for <down
interval> seconds, and then this cycle repeats.
Also, consider using this in combination with the dm-delay target too,
which can delay reads and writes and/or send them to different
underlying devices.
Table parameters
----------------
<dev path> <offset> <up interval> <down interval> \
[<num_features> [<feature arguments>]]
Mandatory parameters:
<dev path>: Full pathname to the underlying block-device, or a
"major:minor" device-number.
<offset>: Starting sector within the device.
<up interval>: Number of seconds device is available.
<down interval>: Number of seconds device returns errors.
Optional feature parameters:
If no feature parameters are present, during the periods of
unreliability, all I/O returns errors.
drop_writes:
All write I/O is silently ignored.
Read I/O is handled correctly.
corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
During <down interval>, replace <Nth_byte> of the data of
each matching bio with <value>.
<Nth_byte>: The offset of the byte to replace.
Counting starts at 1, to replace the first byte.
<direction>: Either 'r' to corrupt reads or 'w' to corrupt writes.
'w' is incompatible with drop_writes.
<value>: The value (from 0-255) to write.
<flags>: Perform the replacement only if bio->bi_rw has all the
selected flags set.
Examples:
corrupt_bio_byte 32 r 1 0
- replaces the 32nd byte of READ bios with the value 1
corrupt_bio_byte 224 w 0 32
- replaces the 224th byte of REQ_META (=32) bios with the value 0

75
doc/kernel/io.txt Normal file
View File

@@ -0,0 +1,75 @@
dm-io
=====
Dm-io provides synchronous and asynchronous I/O services. There are three
types of I/O services available, and each type has a sync and an async
version.
The user must set up an io_region structure to describe the desired location
of the I/O. Each io_region indicates a block-device along with the starting
sector and size of the region.
struct io_region {
struct block_device *bdev;
sector_t sector;
sector_t count;
};
Dm-io can read from one io_region or write to one or more io_regions. Writes
to multiple regions are specified by an array of io_region structures.
The first I/O service type takes a list of memory pages as the data buffer for
the I/O, along with an offset into the first page.
struct page_list {
struct page_list *next;
struct page *page;
};
int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
struct page_list *pl, unsigned int offset,
unsigned long *error_bits);
int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
struct page_list *pl, unsigned int offset,
io_notify_fn fn, void *context);
The second I/O service type takes an array of bio vectors as the data buffer
for the I/O. This service can be handy if the caller has a pre-assembled bio,
but wants to direct different portions of the bio to different devices.
int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where,
int rw, struct bio_vec *bvec,
unsigned long *error_bits);
int dm_io_async_bvec(unsigned int num_regions, struct io_region *where,
int rw, struct bio_vec *bvec,
io_notify_fn fn, void *context);
The third I/O service type takes a pointer to a vmalloc'd memory buffer as the
data buffer for the I/O. This service can be handy if the caller needs to do
I/O to a large region but doesn't want to allocate a large number of individual
memory pages.
int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
void *data, unsigned long *error_bits);
int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
void *data, io_notify_fn fn, void *context);
Callers of the asynchronous I/O services must include the name of a completion
callback routine and a pointer to some context data for the I/O.
typedef void (*io_notify_fn)(unsigned long error, void *context);
The "error" parameter in this callback, as well as the "*error" parameter in
all of the synchronous versions, is a bitset (instead of a simple error value).
In the case of an write-I/O to multiple regions, this bitset allows dm-io to
indicate success or failure on each individual region.
Before using any of the dm-io services, the user should call dm_io_get()
and specify the number of pages they expect to perform I/O on concurrently.
Dm-io will attempt to resize its mempool to make sure enough pages are
always available in order to avoid unnecessary waiting while performing I/O.
When the user is finished using the dm-io services, they should call
dm_io_put() and specify the same number of pages that were given on the
dm_io_get() call.

47
doc/kernel/kcopyd.txt Normal file
View File

@@ -0,0 +1,47 @@
kcopyd
======
Kcopyd provides the ability to copy a range of sectors from one block-device
to one or more other block-devices, with an asynchronous completion
notification. It is used by dm-snapshot and dm-mirror.
Users of kcopyd must first create a client and indicate how many memory pages
to set aside for their copy jobs. This is done with a call to
kcopyd_client_create().
int kcopyd_client_create(unsigned int num_pages,
struct kcopyd_client **result);
To start a copy job, the user must set up io_region structures to describe
the source and destinations of the copy. Each io_region indicates a
block-device along with the starting sector and size of the region. The source
of the copy is given as one io_region structure, and the destinations of the
copy are given as an array of io_region structures.
struct io_region {
struct block_device *bdev;
sector_t sector;
sector_t count;
};
To start the copy, the user calls kcopyd_copy(), passing in the client
pointer, pointers to the source and destination io_regions, the name of a
completion callback routine, and a pointer to some context data for the copy.
int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
unsigned int num_dests, struct io_region *dests,
unsigned int flags, kcopyd_notify_fn fn, void *context);
typedef void (*kcopyd_notify_fn)(int read_err, unsigned int write_err,
void *context);
When the copy completes, kcopyd will call the user's completion routine,
passing back the user's context pointer. It will also indicate if a read or
write error occurred during the copy.
When a user is done with all their copy jobs, they should call
kcopyd_client_destroy() to delete the kcopyd client, which will release the
associated memory pages.
void kcopyd_client_destroy(struct kcopyd_client *kc);

61
doc/kernel/linear.txt Normal file
View File

@@ -0,0 +1,61 @@
dm-linear
=========
Device-Mapper's "linear" target maps a linear range of the Device-Mapper
device onto a linear range of another device. This is the basic building
block of logical volume managers.
Parameters: <dev path> <offset>
<dev path>: Full pathname to the underlying block-device, or a
"major:minor" device-number.
<offset>: Starting sector within the device.
Example scripts
===============
[[
#!/bin/sh
# Create an identity mapping for a device
echo "0 `blockdev --getsize $1` linear $1 0" | dmsetup create identity
]]
[[
#!/bin/sh
# Join 2 devices together
size1=`blockdev --getsize $1`
size2=`blockdev --getsize $2`
echo "0 $size1 linear $1 0
$size1 $size2 linear $2 0" | dmsetup create joined
]]
[[
#!/usr/bin/perl -w
# Split a device into 4M chunks and then join them together in reverse order.
my $name = "reverse";
my $extent_size = 4 * 1024 * 2;
my $dev = $ARGV[0];
my $table = "";
my $count = 0;
if (!defined($dev)) {
die("Please specify a device.\n");
}
my $dev_size = `blockdev --getsize $dev`;
my $extents = int($dev_size / $extent_size) -
(($dev_size % $extent_size) ? 1 : 0);
while ($extents > 0) {
my $this_start = $count * $extent_size;
$extents--;
$count++;
my $this_offset = $extents * $extent_size;
$table .= "$this_start $extent_size linear $dev $this_offset\n";
}
`echo \"$table\" | dmsetup create $name`;
]]

54
doc/kernel/log.txt Normal file
View File

@@ -0,0 +1,54 @@
Device-Mapper Logging
=====================
The device-mapper logging code is used by some of the device-mapper
RAID targets to track regions of the disk that are not consistent.
A region (or portion of the address space) of the disk may be
inconsistent because a RAID stripe is currently being operated on or
a machine died while the region was being altered. In the case of
mirrors, a region would be considered dirty/inconsistent while you
are writing to it because the writes need to be replicated for all
the legs of the mirror and may not reach the legs at the same time.
Once all writes are complete, the region is considered clean again.
There is a generic logging interface that the device-mapper RAID
implementations use to perform logging operations (see
dm_dirty_log_type in include/linux/dm-dirty-log.h). Various different
logging implementations are available and provide different
capabilities. The list includes:
Type Files
==== =====
disk drivers/md/dm-log.c
core drivers/md/dm-log.c
userspace drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
The "disk" log type
-------------------
This log implementation commits the log state to disk. This way, the
logging state survives reboots/crashes.
The "core" log type
-------------------
This log implementation keeps the log state in memory. The log state
will not survive a reboot or crash, but there may be a small boost in
performance. This method can also be used if no storage device is
available for storing log state.
The "userspace" log type
------------------------
This log type simply provides a way to export the log API to userspace,
so log implementations can be done there. This is done by forwarding most
logging requests to userspace, where a daemon receives and processes the
request.
The structure used for communication between kernel and userspace are
located in include/linux/dm-log-userspace.h. Due to the frequency,
diversity, and 2-way communication nature of the exchanges between
kernel and userspace, 'connector' is used as the interface for
communication.
There are currently two userspace log implementations that leverage this
framework - "clustered-disk" and "clustered-core". These implementations
provide a cluster-coherent log for shared-storage. Device-mapper mirroring
can be used in a shared-storage environment when the cluster log implementations
are employed.

View File

@@ -0,0 +1,84 @@
Introduction
============
The more-sophisticated device-mapper targets require complex metadata
that is managed in kernel. In late 2010 we were seeing that various
different targets were rolling their own data strutures, for example:
- Mikulas Patocka's multisnap implementation
- Heinz Mauelshagen's thin provisioning target
- Another btree-based caching target posted to dm-devel
- Another multi-snapshot target based on a design of Daniel Phillips
Maintaining these data structures takes a lot of work, so if possible
we'd like to reduce the number.
The persistent-data library is an attempt to provide a re-usable
framework for people who want to store metadata in device-mapper
targets. It's currently used by the thin-provisioning target and an
upcoming hierarchical storage target.
Overview
========
The main documentation is in the header files which can all be found
under drivers/md/persistent-data.
The block manager
-----------------
dm-block-manager.[hc]
This provides access to the data on disk in fixed sized-blocks. There
is a read/write locking interface to prevent concurrent accesses, and
keep data that is being used in the cache.
Clients of persistent-data are unlikely to use this directly.
The transaction manager
-----------------------
dm-transaction-manager.[hc]
This restricts access to blocks and enforces copy-on-write semantics.
The only way you can get hold of a writable block through the
transaction manager is by shadowing an existing block (ie. doing
copy-on-write) or allocating a fresh one. Shadowing is elided within
the same transaction so performance is reasonable. The commit method
ensures that all data is flushed before it writes the superblock.
On power failure your metadata will be as it was when last committed.
The Space Maps
--------------
dm-space-map.h
dm-space-map-metadata.[hc]
dm-space-map-disk.[hc]
On-disk data structures that keep track of reference counts of blocks.
Also acts as the allocator of new blocks. Currently two
implementations: a simpler one for managing blocks on a different
device (eg. thinly-provisioned data blocks); and one for managing
the metadata space. The latter is complicated by the need to store
its own data within the space it's managing.
The data structures
-------------------
dm-btree.[hc]
dm-btree-remove.c
dm-btree-spine.c
dm-btree-internal.h
Currently there is only one data structure, a hierarchical btree.
There are plans to add more. For example, something with an
array-like interface would see a lot of use.
The btree is 'hierarchical' in that you can define it to be composed
of nested btrees, and take multiple keys. For example, the
thin-provisioning target uses a btree with two levels of nesting.
The first maps a device id to a mapping tree, and that in turn maps a
virtual block to a physical block.
Values stored in the btrees can have arbitrary size. Keys are always
64bits, although nesting allows you to use multiple keys.

View File

@@ -0,0 +1,39 @@
dm-queue-length
===============
dm-queue-length is a path selector module for device-mapper targets,
which selects a path with the least number of in-flight I/Os.
The path selector name is 'queue-length'.
Table parameters for each path: [<repeat_count>]
<repeat_count>: The number of I/Os to dispatch using the selected
path before switching to the next path.
If not given, internal default is used. To check
the default value, see the activated table.
Status for each path: <status> <fail-count> <in-flight>
<status>: 'A' if the path is active, 'F' if the path is failed.
<fail-count>: The number of path failures.
<in-flight>: The number of in-flight I/Os on the path.
Algorithm
=========
dm-queue-length increments/decrements 'in-flight' when an I/O is
dispatched/completed respectively.
dm-queue-length selects a path with the minimum 'in-flight'.
Examples
========
In case that 2 paths (sda and sdb) are used with repeat_count == 128.
# echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \
dmsetup create test
#
# dmsetup table
test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128
#
# dmsetup status
test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0

108
doc/kernel/raid.txt Normal file
View File

@@ -0,0 +1,108 @@
dm-raid
-------
The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
It allows the MD RAID drivers to be accessed using a device-mapper
interface.
The target is named "raid" and it accepts the following parameters:
<raid_type> <#raid_params> <raid_params> \
<#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
<raid_type>:
raid1 RAID1 mirroring
raid4 RAID4 dedicated parity disk
raid5_la RAID5 left asymmetric
- rotating parity 0 with data continuation
raid5_ra RAID5 right asymmetric
- rotating parity N with data continuation
raid5_ls RAID5 left symmetric
- rotating parity 0 with data restart
raid5_rs RAID5 right symmetric
- rotating parity N with data restart
raid6_zr RAID6 zero restart
- rotating parity zero (left-to-right) with data restart
raid6_nr RAID6 N restart
- rotating parity N (right-to-left) with data restart
raid6_nc RAID6 N continue
- rotating parity N (right-to-left) with data continuation
Refererence: Chapter 4 of
http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
<#raid_params>: The number of parameters that follow.
<raid_params> consists of
Mandatory parameters:
<chunk_size>: Chunk size in sectors. This parameter is often known as
"stripe size". It is the only mandatory parameter and
is placed first.
followed by optional parameters (in any order):
[sync|nosync] Force or prevent RAID initialization.
[rebuild <idx>] Rebuild drive number idx (first drive is 0).
[daemon_sleep <ms>]
Interval between runs of the bitmap daemon that
clear bits. A longer interval means less bitmap I/O but
resyncing after a failure is likely to take longer.
[min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
[max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
[write_mostly <idx>] Drive index is write-mostly
[max_write_behind <sectors>] See '-write-behind=' (man mdadm)
[stripe_cache <sectors>] Stripe cache size (higher RAIDs only)
[region_size <sectors>]
The region_size multiplied by the number of regions is the
logical size of the array. The bitmap records the device
synchronisation state for each region.
<#raid_devs>: The number of devices composing the array.
Each device consists of two entries. The first is the device
containing the metadata (if any); the second is the one containing the
data.
If a drive has failed or is missing at creation time, a '-' can be
given for both the metadata and data drives for a given position.
Example tables
--------------
# RAID4 - 4 data drives, 1 parity (no metadata devices)
# No metadata devices specified to hold superblock/bitmap info
# Chunk size of 1MiB
# (Lines separated for easy reading)
0 1960893648 raid \
raid4 1 2048 \
5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
# RAID4 - 4 data drives, 1 parity (with metadata devices)
# Chunk size of 1MiB, force RAID initialization,
# min recovery rate at 20 kiB/sec/disk
0 1960893648 raid \
raid4 4 2048 sync min_recovery_rate 20 \
5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
'dmsetup table' displays the table used to construct the mapping.
The optional parameters are always printed in the order listed
above with "sync" or "nosync" always output ahead of the other
arguments, regardless of the order used when originally loading the table.
Arguments that can be repeated are ordered by value.
'dmsetup status' yields information on the state and health of the
array.
The output is as follows:
1: <s> <l> raid \
2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
Line 1 is the standard output produced by device-mapper.
Line 2 is produced by the raid target, and best explained by example:
0 1960893648 raid raid4 5 AAAAA 2/490221568
Here we can see the RAID type is raid4, there are 5 devices - all of
which are 'A'live, and the array is 2/490221568 complete with recovery.
Faulty or missing devices are marked 'D'. Devices that are out-of-sync
are marked 'a'.

View File

@@ -0,0 +1,91 @@
dm-service-time
===============
dm-service-time is a path selector module for device-mapper targets,
which selects a path with the shortest estimated service time for
the incoming I/O.
The service time for each path is estimated by dividing the total size
of in-flight I/Os on a path with the performance value of the path.
The performance value is a relative throughput value among all paths
in a path-group, and it can be specified as a table argument.
The path selector name is 'service-time'.
Table parameters for each path: [<repeat_count> [<relative_throughput>]]
<repeat_count>: The number of I/Os to dispatch using the selected
path before switching to the next path.
If not given, internal default is used. To check
the default value, see the activated table.
<relative_throughput>: The relative throughput value of the path
among all paths in the path-group.
The valid range is 0-100.
If not given, minimum value '1' is used.
If '0' is given, the path isn't selected while
other paths having a positive value are available.
Status for each path: <status> <fail-count> <in-flight-size> \
<relative_throughput>
<status>: 'A' if the path is active, 'F' if the path is failed.
<fail-count>: The number of path failures.
<in-flight-size>: The size of in-flight I/Os on the path.
<relative_throughput>: The relative throughput value of the path
among all paths in the path-group.
Algorithm
=========
dm-service-time adds the I/O size to 'in-flight-size' when the I/O is
dispatched and subtracts when completed.
Basically, dm-service-time selects a path having minimum service time
which is calculated by:
('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput'
However, some optimizations below are used to reduce the calculation
as much as possible.
1. If the paths have the same 'relative_throughput', skip
the division and just compare the 'in-flight-size'.
2. If the paths have the same 'in-flight-size', skip the division
and just compare the 'relative_throughput'.
3. If some paths have non-zero 'relative_throughput' and others
have zero 'relative_throughput', ignore those paths with zero
'relative_throughput'.
If such optimizations can't be applied, calculate service time, and
compare service time.
If calculated service time is equal, the path having maximum
'relative_throughput' may be better. So compare 'relative_throughput'
then.
Examples
========
In case that 2 paths (sda and sdb) are used with repeat_count == 128
and sda has an average throughput 1GB/s and sdb has 4GB/s,
'relative_throughput' value may be '1' for sda and '4' for sdb.
# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \
dmsetup create test
#
# dmsetup table
test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4
#
# dmsetup status
test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4
Or '2' for sda and '8' for sdb would be also true.
# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \
dmsetup create test
#
# dmsetup table
test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8
#
# dmsetup status
test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8

168
doc/kernel/snapshot.txt Normal file
View File

@@ -0,0 +1,168 @@
Device-mapper snapshot support
==============================
Device-mapper allows you, without massive data copying:
*) To create snapshots of any block device i.e. mountable, saved states of
the block device which are also writable without interfering with the
original content;
*) To create device "forks", i.e. multiple different versions of the
same data stream.
*) To merge a snapshot of a block device back into the snapshot's origin
device.
In the first two cases, dm copies only the chunks of data that get
changed and uses a separate copy-on-write (COW) block device for
storage.
For snapshot merge the contents of the COW storage are merged back into
the origin device.
There are three dm targets available:
snapshot, snapshot-origin, and snapshot-merge.
*) snapshot-origin <origin>
which will normally have one or more snapshots based on it.
Reads will be mapped directly to the backing device. For each write, the
original data will be saved in the <COW device> of each snapshot to keep
its visible content unchanged, at least until the <COW device> fills up.
*) snapshot <origin> <COW device> <persistent?> <chunksize>
A snapshot of the <origin> block device is created. Changed chunks of
<chunksize> sectors will be stored on the <COW device>. Writes will
only go to the <COW device>. Reads will come from the <COW device> or
from <origin> for unchanged data. <COW device> will often be
smaller than the origin and if it fills up the snapshot will become
useless and be disabled, returning errors. So it is important to monitor
the amount of free space and expand the <COW device> before it fills up.
<persistent?> is P (Persistent) or N (Not persistent - will not survive
after reboot).
The difference is that for transient snapshots less metadata must be
saved on disk - they can be kept in memory by the kernel.
* snapshot-merge <origin> <COW device> <persistent> <chunksize>
takes the same table arguments as the snapshot target except it only
works with persistent snapshots. This target assumes the role of the
"snapshot-origin" target and must not be loaded if the "snapshot-origin"
is still present for <origin>.
Creates a merging snapshot that takes control of the changed chunks
stored in the <COW device> of an existing snapshot, through a handover
procedure, and merges these chunks back into the <origin>. Once merging
has started (in the background) the <origin> may be opened and the merge
will continue while I/O is flowing to it. Changes to the <origin> are
deferred until the merging snapshot's corresponding chunk(s) have been
merged. Once merging has started the snapshot device, associated with
the "snapshot" target, will return -EIO when accessed.
How snapshot is used by LVM2
============================
When you create the first LVM2 snapshot of a volume, four dm devices are used:
1) a device containing the original mapping table of the source volume;
2) a device used as the <COW device>;
3) a "snapshot" device, combining #1 and #2, which is the visible snapshot
volume;
4) the "original" volume (which uses the device number used by the original
source volume), whose table is replaced by a "snapshot-origin" mapping
from device #1.
A fixed naming scheme is used, so with the following commands:
lvcreate -L 1G -n base volumeGroup
lvcreate -L 100M --snapshot -n snap volumeGroup/base
we'll have this situation (with volumes in above order):
# dmsetup table|grep volumeGroup
volumeGroup-base-real: 0 2097152 linear 8:19 384
volumeGroup-snap-cow: 0 204800 linear 8:19 2097536
volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16
volumeGroup-base: 0 2097152 snapshot-origin 254:11
# ls -lL /dev/mapper/volumeGroup-*
brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
brw------- 1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow
brw------- 1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap
brw------- 1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base
How snapshot-merge is used by LVM2
==================================
A merging snapshot assumes the role of the "snapshot-origin" while
merging. As such the "snapshot-origin" is replaced with
"snapshot-merge". The "-real" device is not changed and the "-cow"
device is renamed to <origin name>-cow to aid LVM2's cleanup of the
merging snapshot after it completes. The "snapshot" that hands over its
COW device to the "snapshot-merge" is deactivated (unless using lvchange
--refresh); but if it is left active it will simply return I/O errors.
A snapshot will merge into its origin with the following command:
lvconvert --merge volumeGroup/snap
we'll now have this situation:
# dmsetup table|grep volumeGroup
volumeGroup-base-real: 0 2097152 linear 8:19 384
volumeGroup-base-cow: 0 204800 linear 8:19 2097536
volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
# ls -lL /dev/mapper/volumeGroup-*
brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
brw------- 1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
brw------- 1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
How to determine when a merging is complete
===========================================
The snapshot-merge and snapshot status lines end with:
<sectors_allocated>/<total_sectors> <metadata_sectors>
Both <sectors_allocated> and <total_sectors> include both data and metadata.
During merging, the number of sectors allocated gets smaller and
smaller. Merging has finished when the number of sectors holding data
is zero, in other words <sectors_allocated> == <metadata_sectors>.
Here is a practical example (using a hybrid of lvm and dmsetup commands):
# lvs
LV VG Attr LSize Origin Snap% Move Log Copy% Convert
base volumeGroup owi-a- 4.00g
snap volumeGroup swi-a- 1.00g base 18.97
# dmsetup status volumeGroup-snap
0 8388608 snapshot 397896/2097152 1560
^^^^ metadata sectors
# lvconvert --merge -b volumeGroup/snap
Merging of volume snap started.
# lvs volumeGroup/snap
LV VG Attr LSize Origin Snap% Move Log Copy% Convert
base volumeGroup Owi-a- 4.00g 17.23
# dmsetup status volumeGroup-base
0 8388608 snapshot-merge 281688/2097152 1104
# dmsetup status volumeGroup-base
0 8388608 snapshot-merge 180480/2097152 712
# dmsetup status volumeGroup-base
0 8388608 snapshot-merge 16/2097152 16
Merging has finished.
# lvs
LV VG Attr LSize Origin Snap% Move Log Copy% Convert
base volumeGroup owi-a- 4.00g

58
doc/kernel/striped.txt Normal file
View File

@@ -0,0 +1,58 @@
dm-stripe
=========
Device-Mapper's "striped" target is used to create a striped (i.e. RAID-0)
device across one or more underlying devices. Data is written in "chunks",
with consecutive chunks rotating among the underlying devices. This can
potentially provide improved I/O throughput by utilizing several physical
devices in parallel.
Parameters: <num devs> <chunk size> [<dev path> <offset>]+
<num devs>: Number of underlying devices.
<chunk size>: Size of each chunk of data. Must be a power-of-2 and at
least as large as the system's PAGE_SIZE.
<dev path>: Full pathname to the underlying block-device, or a
"major:minor" device-number.
<offset>: Starting sector within the device.
One or more underlying devices can be specified. The striped device size must
be a multiple of the chunk size and a multiple of the number of underlying
devices.
Example scripts
===============
[[
#!/usr/bin/perl -w
# Create a striped device across any number of underlying devices. The device
# will be called "stripe_dev" and have a chunk-size of 128k.
my $chunk_size = 128 * 2;
my $dev_name = "stripe_dev";
my $num_devs = @ARGV;
my @devs = @ARGV;
my ($min_dev_size, $stripe_dev_size, $i);
if (!$num_devs) {
die("Specify at least one device\n");
}
$min_dev_size = `blockdev --getsize $devs[0]`;
for ($i = 1; $i < $num_devs; $i++) {
my $this_size = `blockdev --getsize $devs[$i]`;
$min_dev_size = ($min_dev_size < $this_size) ?
$min_dev_size : $this_size;
}
$stripe_dev_size = $min_dev_size * $num_devs;
$stripe_dev_size -= $stripe_dev_size % ($chunk_size * $num_devs);
$table = "0 $stripe_dev_size striped $num_devs $chunk_size";
for ($i = 0; $i < $num_devs; $i++) {
$table .= " $devs[$i] 0";
}
`echo $table | dmsetup create $dev_name`;
]]

View File

@@ -0,0 +1,285 @@
Introduction
============
This document descibes a collection of device-mapper targets that
between them implement thin-provisioning and snapshots.
The main highlight of this implementation, compared to the previous
implementation of snapshots, is that it allows many virtual devices to
be stored on the same data volume. This simplifies administration and
allows the sharing of data between volumes, thus reducing disk usage.
Another significant feature is support for an arbitrary depth of
recursive snapshots (snapshots of snapshots of snapshots ...). The
previous implementation of snapshots did this by chaining together
lookup tables, and so performance was O(depth). This new
implementation uses a single data structure to avoid this degradation
with depth. Fragmentation may still be an issue, however, in some
scenarios.
Metadata is stored on a separate device from data, giving the
administrator some freedom, for example to:
- Improve metadata resilience by storing metadata on a mirrored volume
but data on a non-mirrored one.
- Improve performance by storing the metadata on SSD.
Status
======
These targets are very much still in the EXPERIMENTAL state. Please
do not yet rely on them in production. But do experiment and offer us
feedback. Different use cases will have different performance
characteristics, for example due to fragmentation of the data volume.
If you find this software is not performing as expected please mail
dm-devel@redhat.com with details and we'll try our best to improve
things for you.
Userspace tools for checking and repairing the metadata are under
development.
Cookbook
========
This section describes some quick recipes for using thin provisioning.
They use the dmsetup program to control the device-mapper driver
directly. End users will be advised to use a higher-level volume
manager such as LVM2 once support has been added.
Pool device
-----------
The pool device ties together the metadata volume and the data volume.
It maps I/O linearly to the data volume and updates the metadata via
two mechanisms:
- Function calls from the thin targets
- Device-mapper 'messages' from userspace which control the creation of new
virtual devices amongst other things.
Setting up a fresh pool device
------------------------------
Setting up a pool device requires a valid metadata device, and a
data device. If you do not have an existing metadata device you can
make one by zeroing the first 4k to indicate empty metadata.
dd if=/dev/zero of=$metadata_dev bs=4096 count=1
The amount of metadata you need will vary according to how many blocks
are shared between thin devices (i.e. through snapshots). If you have
less sharing than average you'll need a larger-than-average metadata device.
As a guide, we suggest you calculate the number of bytes to use in the
metadata device as 48 * $data_dev_size / $data_block_size but round it up
to 2MB if the answer is smaller. The largest size supported is 16GB.
If you're creating large numbers of snapshots which are recording large
amounts of change, you may need find you need to increase this.
Reloading a pool table
----------------------
You may reload a pool's table, indeed this is how the pool is resized
if it runs out of space. (N.B. While specifying a different metadata
device when reloading is not forbidden at the moment, things will go
wrong if it does not route I/O to exactly the same on-disk location as
previously.)
Using an existing pool device
-----------------------------
dmsetup create pool \
--table "0 20971520 thin-pool $metadata_dev $data_dev \
$data_block_size $low_water_mark"
$data_block_size gives the smallest unit of disk space that can be
allocated at a time expressed in units of 512-byte sectors. People
primarily interested in thin provisioning may want to use a value such
as 1024 (512KB). People doing lots of snapshotting may want a smaller value
such as 128 (64KB). If you are not zeroing newly-allocated data,
a larger $data_block_size in the region of 256000 (128MB) is suggested.
$data_block_size must be the same for the lifetime of the
metadata device.
$low_water_mark is expressed in blocks of size $data_block_size. If
free space on the data device drops below this level then a dm event
will be triggered which a userspace daemon should catch allowing it to
extend the pool device. Only one such event will be sent.
Resuming a device with a new table itself triggers an event so the
userspace daemon can use this to detect a situation where a new table
already exceeds the threshold.
Thin provisioning
-----------------
i) Creating a new thinly-provisioned volume.
To create a new thinly- provisioned volume you must send a message to an
active pool device, /dev/mapper/pool in this example.
dmsetup message /dev/mapper/pool 0 "create_thin 0"
Here '0' is an identifier for the volume, a 24-bit number. It's up
to the caller to allocate and manage these identifiers. If the
identifier is already in use, the message will fail with -EEXIST.
ii) Using a thinly-provisioned volume.
Thinly-provisioned volumes are activated using the 'thin' target:
dmsetup create thin --table "0 2097152 thin /dev/mapper/pool 0"
The last parameter is the identifier for the thinp device.
Internal snapshots
------------------
i) Creating an internal snapshot.
Snapshots are created with another message to the pool.
N.B. If the origin device that you wish to snapshot is active, you
must suspend it before creating the snapshot to avoid corruption.
This is NOT enforced at the moment, so please be careful!
dmsetup suspend /dev/mapper/thin
dmsetup message /dev/mapper/pool 0 "create_snap 1 0"
dmsetup resume /dev/mapper/thin
Here '1' is the identifier for the volume, a 24-bit number. '0' is the
identifier for the origin device.
ii) Using an internal snapshot.
Once created, the user doesn't have to worry about any connection
between the origin and the snapshot. Indeed the snapshot is no
different from any other thinly-provisioned device and can be
snapshotted itself via the same method. It's perfectly legal to
have only one of them active, and there's no ordering requirement on
activating or removing them both. (This differs from conventional
device-mapper snapshots.)
Activate it exactly the same way as any other thinly-provisioned volume:
dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
Deactivation
------------
All devices using a pool must be deactivated before the pool itself
can be.
dmsetup remove thin
dmsetup remove snap
dmsetup remove pool
Reference
=========
'thin-pool' target
------------------
i) Constructor
thin-pool <metadata dev> <data dev> <data block size (sectors)> \
<low water mark (blocks)> [<number of feature args> [<arg>]*]
Optional feature arguments:
- 'skip_block_zeroing': skips the zeroing of newly-provisioned blocks.
Data block size must be between 64KB (128 sectors) and 1GB
(2097152 sectors) inclusive.
ii) Status
<transaction id> <used metadata blocks>/<total metadata blocks>
<used data blocks>/<total data blocks> <held metadata root>
transaction id:
A 64-bit number used by userspace to help synchronise with metadata
from volume managers.
used data blocks / total data blocks
If the number of free blocks drops below the pool's low water mark a
dm event will be sent to userspace. This event is edge-triggered and
it will occur only once after each resume so volume manager writers
should register for the event and then check the target's status.
held metadata root:
The location, in sectors, of the metadata root that has been
'held' for userspace read access. '-' indicates there is no
held root. This feature is not yet implemented so '-' is
always returned.
iii) Messages
create_thin <dev id>
Create a new thinly-provisioned device.
<dev id> is an arbitrary unique 24-bit identifier chosen by
the caller.
create_snap <dev id> <origin id>
Create a new snapshot of another thinly-provisioned device.
<dev id> is an arbitrary unique 24-bit identifier chosen by
the caller.
<origin id> is the identifier of the thinly-provisioned device
of which the new device will be a snapshot.
delete <dev id>
Deletes a thin device. Irreversible.
trim <dev id> <new size in sectors>
Delete mappings from the end of a thin device. Irreversible.
You might want to use this if you're reducing the size of
your thinly-provisioned device. In many cases, due to the
sharing of blocks between devices, it is not possible to
determine in advance how much space 'trim' will release. (In
future a userspace tool might be able to perform this
calculation.)
set_transaction_id <current id> <new id>
Userland volume managers, such as LVM, need a way to
synchronise their external metadata with the internal metadata of the
pool target. The thin-pool target offers to store an
arbitrary 64-bit transaction id and return it on the target's
status line. To avoid races you must provide what you think
the current transaction id is when you change it with this
compare-and-swap message.
'thin' target
-------------
i) Constructor
thin <pool dev> <dev id>
pool dev:
the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
dev id:
the internal device identifier of the device to be
activated.
The pool doesn't store any size against the thin devices. If you
load a thin target that is smaller than you've been using previously,
then you'll have no access to blocks mapped beyond the end. If you
load a target that is bigger than before, then extra blocks will be
provisioned as and when needed.
If you wish to reduce the size of your thin device and potentially
regain some space then send the 'trim' message to the pool.
ii) Status
<nr mapped sectors> <highest mapped sector>

97
doc/kernel/uevent.txt Normal file
View File

@@ -0,0 +1,97 @@
The device-mapper uevent code adds the capability to device-mapper to create
and send kobject uevents (uevents). Previously device-mapper events were only
available through the ioctl interface. The advantage of the uevents interface
is the event contains environment attributes providing increased context for
the event avoiding the need to query the state of the device-mapper device after
the event is received.
There are two functions currently for device-mapper events. The first function
listed creates the event and the second function sends the event(s).
void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
const char *path, unsigned nr_valid_paths)
void dm_send_uevents(struct list_head *events, struct kobject *kobj)
The variables added to the uevent environment are:
Variable Name: DM_TARGET
Uevent Action(s): KOBJ_CHANGE
Type: string
Description:
Value: Name of device-mapper target that generated the event.
Variable Name: DM_ACTION
Uevent Action(s): KOBJ_CHANGE
Type: string
Description:
Value: Device-mapper specific action that caused the uevent action.
PATH_FAILED - A path has failed.
PATH_REINSTATED - A path has been reinstated.
Variable Name: DM_SEQNUM
Uevent Action(s): KOBJ_CHANGE
Type: unsigned integer
Description: A sequence number for this specific device-mapper device.
Value: Valid unsigned integer range.
Variable Name: DM_PATH
Uevent Action(s): KOBJ_CHANGE
Type: string
Description: Major and minor number of the path device pertaining to this
event.
Value: Path name in the form of "Major:Minor"
Variable Name: DM_NR_VALID_PATHS
Uevent Action(s): KOBJ_CHANGE
Type: unsigned integer
Description:
Value: Valid unsigned integer range.
Variable Name: DM_NAME
Uevent Action(s): KOBJ_CHANGE
Type: string
Description: Name of the device-mapper device.
Value: Name
Variable Name: DM_UUID
Uevent Action(s): KOBJ_CHANGE
Type: string
Description: UUID of the device-mapper device.
Value: UUID. (Empty string if there isn't one.)
An example of the uevents generated as captured by udevmonitor is shown
below.
1.) Path failure.
UEVENT[1192521009.711215] change@/block/dm-3
ACTION=change
DEVPATH=/block/dm-3
SUBSYSTEM=block
DM_TARGET=multipath
DM_ACTION=PATH_FAILED
DM_SEQNUM=1
DM_PATH=8:32
DM_NR_VALID_PATHS=0
DM_NAME=mpath2
DM_UUID=mpath-35333333000002328
MINOR=3
MAJOR=253
SEQNUM=1130
2.) Path reinstate.
UEVENT[1192521132.989927] change@/block/dm-3
ACTION=change
DEVPATH=/block/dm-3
SUBSYSTEM=block
DM_TARGET=multipath
DM_ACTION=PATH_REINSTATED
DM_SEQNUM=2
DM_PATH=8:32
DM_NR_VALID_PATHS=1
DM_NAME=mpath2
DM_UUID=mpath-35333333000002328
MINOR=3
MAJOR=253
SEQNUM=1131

37
doc/kernel/zero.txt Normal file
View File

@@ -0,0 +1,37 @@
dm-zero
=======
Device-Mapper's "zero" target provides a block-device that always returns
zero'd data on reads and silently drops writes. This is similar behavior to
/dev/zero, but as a block-device instead of a character-device.
Dm-zero has no target-specific parameters.
One very interesting use of dm-zero is for creating "sparse" devices in
conjunction with dm-snapshot. A sparse device reports a device-size larger
than the amount of actual storage space available for that device. A user can
write data anywhere within the sparse device and read it back like a normal
device. Reads to previously unwritten areas will return a zero'd buffer. When
enough data has been written to fill up the actual storage space, the sparse
device is deactivated. This can be very useful for testing device and
filesystem limitations.
To create a sparse device, start by creating a dm-zero device that's the
desired size of the sparse device. For this example, we'll assume a 10TB
sparse device.
TEN_TERABYTES=`expr 10 \* 1024 \* 1024 \* 1024 \* 2` # 10 TB in sectors
echo "0 $TEN_TERABYTES zero" | dmsetup create zero1
Then create a snapshot of the zero device, using any available block-device as
the COW device. The size of the COW device will determine the amount of real
space available to the sparse device. For this example, we'll assume /dev/sdb1
is an available 10GB partition.
echo "0 $TEN_TERABYTES snapshot /dev/mapper/zero1 /dev/sdb1 p 128" | \
dmsetup create sparse1
This will create a 10TB sparse device called /dev/mapper/sparse1 that has
10GB of actual storage space available. If more than 10GB of data is written
to this device, it will start returning I/O errors.

475
doc/lvm2-raid.txt Normal file
View File

@@ -0,0 +1,475 @@
=======================
= LVM RAID Design Doc =
=======================
#############################
# Chapter 1: User-Interface #
#############################
***************** CREATING A RAID DEVICE ******************
01: lvcreate --type <RAID type> \
02: [--regionsize <size>] \
03: [-i/--stripes <#>] [-I,--stripesize <size>] \
04: [-m/--mirrors <#>] \
05: [--[min|max]recoveryrate <kB/sec/disk>] \
06: [--stripecache <size>] \
07: [--writemostly <devices>] \
08: [--maxwritebehind <size>] \
09: [[no]sync] \
10: <Other normal args, like: -L 5G -n lv vg> \
11: [devices]
Line 01:
I don't intend for there to be shorthand options for specifying the
segment type. The available RAID types are:
"raid0" - Stripe [NOT IMPLEMENTED]
"raid1" - should replace DM Mirroring
"raid10" - striped mirrors, [NOT IMPLEMENTED]
"raid4" - RAID4
"raid5" - Same as "raid5_ls" (Same default as MD)
"raid5_la" - RAID5 Rotating parity 0 with data continuation
"raid5_ra" - RAID5 Rotating parity N with data continuation
"raid5_ls" - RAID5 Rotating parity 0 with data restart
"raid5_rs" - RAID5 Rotating parity N with data restart
"raid6" - Same as "raid6_zr"
"raid6_zr" - RAID6 Rotating parity 0 with data restart
"raid6_nr" - RAID6 Rotating parity N with data restart
"raid6_nc" - RAID6 Rotating parity N with data continuation
The exception to 'no shorthand options' will be where the RAID implementations
can displace traditional tagets. This is the case with 'mirror' and 'raid1'.
In this case, "mirror_segtype_default" - found under the "global" section in
lvm.conf - can be set to "mirror" or "raid1". The segment type inferred when
the '-m' option is used will be taken from this setting. The default segment
types can be overridden on the command line by using the '--type' argument.
Line 02:
Region size is relevant for all RAID types. It defines the granularity for
which the bitmap will track the active areas of disk. The default is currently
4MiB. I see no reason to change this unless it is a problem for MD performance.
MD does impose a restriction of 2^21 regions for a given device, however. This
means two things: 1) we should never need a metadata area larger than
8kiB+sizeof(superblock)+bitmap_offset (IOW, pretty small) and 2) the region
size will have to be upwardly revised if the device is larger than 8TiB
(assuming defaults).
Line 03/04:
The '-m/--mirrors' option is only relevant to RAID1 and will be used just like
it is today for DM mirroring. For all other RAID types, -i/--stripes and
-I/--stripesize are relevant. The former will specify the number of data
devices that will be used for striping. For example, if the user specifies
'--type raid0 -i 3', then 3 devices are needed. If the user specifies
'--type raid6 -i 3', then 5 devices are needed. The -I/--stripesize may be
confusing to MD users, as they use the term "chunksize". I think they will
adapt without issue and I don't wish to create a conflict with the term
"chunksize" that we use for snapshots.
Line 05/06/07:
I'm still not clear on how to specify these options. Some are easier than
others. '--writemostly' is particularly hard because it involves specifying
which devices shall be 'write-mostly' and thus, also have 'max-write-behind'
applied to them. It has been suggested that a '--readmostly'/'--readfavored'
or similar option could be introduced as a way to specify a primary disk vs.
specifying all the non-primary disks via '--writemostly'. I like this idea,
but haven't come up with a good name yet. Thus, these will remain
unimplemented until future specification.
Line 09/10/11:
These are familiar.
Further creation related ideas:
Today, you can specify '--type mirror' without an '-m/--mirrors' argument
necessary. The number of devices defaults to two (and the log defaults to
'disk'). A similar thing should happen with the RAID types. All of them
should default to having two data devices unless otherwise specified. This
would mean a total number of 2 devices for RAID 0/1, 3 devices for RAID 4/5,
and 4 devices for RAID 6/10.
***************** CONVERTING A RAID DEVICE ******************
01: lvconvert [--type <RAID type>] \
02: [-R/--regionsize <size>] \
03: [-i/--stripes <#>] [-I,--stripesize <size>] \
04: [-m/--mirrors <#>] \
05: [--merge]
06: [--splitmirrors <#> [--trackchanges]] \
07: [--replace <sub_lv|device>] \
08: [--[min|max]recoveryrate <kB/sec/disk>] \
09: [--stripecache <size>] \
10: [--writemostly <devices>] \
11: [--maxwritebehind <size>] \
12: vg/lv
13: [devices]
lvconvert should work exactly as it does now when dealing with mirrors -
even if(when) we switch to MD RAID1. Of course, there are no plans to
allow the presense of the metadata area to be configurable (e.g. --corelog).
It will be simple enough to detect if the LV being up/down-converted is
new or old-style mirroring.
If we choose to use MD RAID0 as well, it will be possible to change the
number of stripes and the stripesize. It is therefore conceivable to see
something like, 'lvconvert -i +1 vg/lv'.
Line 01:
It is possible to change the RAID type of an LV - even if that LV is already
a RAID device of a different type. For example, you could change from
RAID4 to RAID5 or RAID5 to RAID6.
Line 02/03/04:
These are familiar options - all of which would now be available as options
for change. (However, it'd be nice if we didn't have regionsize in there.
It's simple on the kernel side, but is just an extra - often unecessary -
parameter to many functions in the LVM codebase.)
Line 05:
This option is used to merge an LV back into a RAID1 array - provided it was
split for temporary read-only use by '--splitmirrors 1 --trackchanges'.
Line 06:
The '--splitmirrors <#>' argument should be familiar from the "mirror" segment
type. It allows RAID1 images to be split from the array to form a new LV.
Either the original LV or the split LV - or both - could become a linear LV as
a result. If the '--trackchanges' argument is specified in addition to
'--splitmirrors', an LV will be split from the array. It will be read-only.
This operation does not change the original array - except that it uses an empty
slot to hold the position of the split LV which it expects to return in the
future (see the '--merge' argument). It tracks any changes that occur to the
array while the slot is kept in reserve. If the LV is merged back into the
array, only the changes are resync'ed to the returning image. Repeating the
'lvconvert' operation without the '--trackchanges' option will complete the
split of the LV permanently.
Line 07:
This option allows the user to specify a sub_lv (e.g. a mirror image) or
a particular device for replacement. The device (or all the devices in
the sub_lv) will be removed and replaced with different devices from the
VG.
Line 08/09/10/11:
It should be possible to alter these parameters of a RAID device. As with
lvcreate, however, I'm not entirely certain how to best define some of these.
We don't need all the capabilities at once though, so it isn't a pressing
issue.
Line 12:
The LV to operate on.
Line 13:
Devices that are to be used to satisfy the conversion request. If the
operation removes devices or splits a mirror, then the devices specified
form the list of candidates for removal. If the operation adds or replaces
devices, then the devices specified form the list of candidates for allocation.
###############################################
# Chapter 2: LVM RAID internal representation #
###############################################
The internal representation is somewhat like mirroring, but with alterations
for the different metadata components. LVM mirroring has a single log LV,
but RAID will have one for each data device. Because of this, I've added a
new 'areas' list to the 'struct lv_segment' - 'meta_areas'. There is exactly
a one-to-one relationship between 'areas' and 'meta_areas'. The 'areas' array
still holds the data sub-lv's (similar to mirroring), while the 'meta_areas'
array holds the metadata sub-lv's (akin to the mirroring log device).
The sub_lvs will be named '%s_rimage_%d' instead of '%s_mimage_%d' as it is
for mirroring, and '%s_rmeta_%d' instead of '%s_mlog'. Thus, you can imagine
an LV named 'foo' with the following layout:
foo
[foo's lv_segment]
|
|-> foo_rimage_0 (areas[0])
| [foo_rimage_0's lv_segment]
|-> foo_rimage_1 (areas[1])
| [foo_rimage_1's lv_segment]
|
|-> foo_rmeta_0 (meta_areas[0])
| [foo_rmeta_0's lv_segment]
|-> foo_rmeta_1 (meta_areas[1])
| [foo_rmeta_1's lv_segment]
LVM Meta-data format
====================
The RAID format will need to be able to store parameters that are unique to
RAID and unique to specific RAID sub-devices. It will be modeled after that
of mirroring.
Here is an example of the mirroring layout:
lv {
id = "agL1vP-1B8Z-5vnB-41cS-lhBJ-Gcvz-dh3L3H"
status = ["READ", "WRITE", "VISIBLE"]
flags = []
segment_count = 1
segment1 {
start_extent = 0
extent_count = 125 # 500 Megabytes
type = "mirror"
mirror_count = 2
mirror_log = "lv_mlog"
region_size = 1024
mirrors = [
"lv_mimage_0", 0,
"lv_mimage_1", 0
]
}
}
The real trick is dealing with the metadata devices. Mirroring has an entry,
'mirror_log', in the top-level segment. This won't work for RAID because there
is a one-to-one mapping between the data devices and the metadata devices. The
mirror devices are layed-out in sub-device/le pairs. The 'le' parameter is
redundant since it will always be zero. So for RAID, I have simple put the
metadata and data devices in pairs without the 'le' parameter.
RAID metadata:
lv {
id = "EnpqAM-5PEg-i9wB-5amn-P116-1T8k-nS3GfD"
status = ["READ", "WRITE", "VISIBLE"]
flags = []
segment_count = 1
segment1 {
start_extent = 0
extent_count = 125 # 500 Megabytes
type = "raid1"
device_count = 2
region_size = 1024
raids = [
"lv_rmeta_0", "lv_rimage_0",
"lv_rmeta_1", "lv_rimage_1",
]
}
}
The metadata also must be capable of representing the various tunables. We
already have a good example for one from mirroring, region_size.
'max_write_behind', 'stripe_cache', and '[min|max]_recovery_rate' could also
be handled in this way. However, 'write_mostly' cannot be handled in this
way, because it is a characteristic associated with the sub_lvs, not the
array as a whole. In these cases, the status field of the sub-lv's themselves
will hold these flags - the meaning being only useful in the larger context.
##############################################
# Chapter 3: LVM RAID implementation details #
##############################################
New Segment Type(s)
===================
I've created a new file 'lib/raid/raid.c' that will handle the various different
RAID types. While there will be a unique segment type for each RAID variant,
they will all share a common backend - segtype_handler functions and
segtype->flags = SEG_RAID.
I'm also adding a new field to 'struct segment_type', parity_devs. For every
segment_type except RAID4/5/6, this will be 0. This field facilitates in
allocation and size calculations. For example, the lvcreate for RAID5 would
look something like:
~> lvcreate --type raid5 -L 30G -i 3 -n my_raid5 my_vg
or
~> lvcreate --type raid5 -n my_raid5 my_vg /dev/sd[bcdef]1
In the former case, the stripe count (3) and device size are computed, and
then 'segtype->parity_devs' extra devices are allocated of the same size. In
the latter case, the number of PVs is determined and 'segtype->parity_devs' is
subtracted off to determine the number of stripes.
This should also work in the case of RAID10 and doing things in this manor
should not affect the way size is calculated via the area_multiple.
Allocation
==========
When a RAID device is created, metadata LVs must be created along with the
data LVs that will ultimately compose the top-level RAID array. For the
foreseeable future, the metadata LVs must reside on the same device as (or
at least one of the devices that compose) the data LV. We use this property
to simplify the allocation process. Rather than allocating for the data LVs
and then asking for a small chunk of space on the same device (or the other
way around), we simply ask for the aggregate size of the data LV plus the
metadata LV. Once we have the space allocated, we divide it between the
metadata and data LVs. This also greatly simplifies the process of finding
parallel space for all the data LVs that will compose the RAID array. When
a RAID device is resized, we will not need to take the metadata LV into
account, because it will already be present.
Apart from the metadata areas, the other unique characteristic of RAID
devices is the parity device count. The number of parity devices does nothing
to the calculation of size-per-device. The 'area_multiple' means nothing
here. The parity devices will simply be the same size as all the other devices
and will also require a metadata LV (i.e. it is treated no differently than
the other devices).
Therefore, to allocate space for RAID devices, we need to know two things:
1) how many parity devices are required and 2) does an allocated area need to
be split out for the metadata LVs after finding the space to fill the request.
We simply add these two fields to the 'alloc_handle' data structure as,
'parity_count' and 'alloc_and_split_meta'. These two fields get set in
'_alloc_init'. The 'segtype->parity_devs' holds the number of parity
drives and can be directly copied to 'ah->parity_count' and
'alloc_and_split_meta' is set when a RAID segtype is detected and
'metadata_area_count' has been specified. With these two variables set, we
can calculate how many allocated areas we need. Also, in the routines that
find the actual space, they stop not when they have found ah->area_count but
when they have found (ah->area_count + ah->parity_count).
Conversion
==========
RAID -> RAID, adding images
---------------------------
When adding images to a RAID array, metadata and data components must be added
as a pair. It is best to perform as many operations as possible before writing
new LVM metadata. This allows us to error-out without having to unwind any
changes. It also makes things easier if the machine should crash during a
conversion operation. Thus, the actions performed when adding a new image are:
1) Allocate the required number of metadata/data pairs using the method
describe above in 'Allocation' (i.e. find the metadata/data space
as one unit and split the space between them after found - this keeps
them together on the same device).
2) Form the metadata/data LVs from the allocated space (leave them
visible) - setting required RAID_[IMAGE | META] flags as appropriate.
3) Write the LVM metadata
4) Activate and clear the metadata LVs. The clearing of the metadata
requires the LVM metadata be written (step 3) and is a requirement
before adding the new metadata LVs to the array. If the metadata
is not cleared, it carry residual superblock state from a previous
array the device may have been part of.
5) Deactivate new sub-LVs and set them "hidden".
6) expand the 'first_seg(raid_lv)->areas' and '->meta_areas' array
for inclusion of the new sub-LVs
7) Add new sub-LVs and update 'first_seg(raid_lv)->area_count'
8) Commit new LVM metadata
Failure during any of these steps will not affect the original RAID array. In
the worst scenario, the user may have to remove the new sub-LVs that did not
yet make it into the array.
RAID -> RAID, removing images
-----------------------------
To remove images from a RAID, the metadata/data LV pairs must be removed
together. This is pretty straight-forward, but one place where RAID really
differs from the "mirror" segment type is how the resulting "holes" are filled.
When a device is removed from a "mirror" segment type, it is identified, moved
to the end of the 'mirrored_seg->areas' array, and then removed. This action
causes the other images to shift down and fill the position of the device which
was removed. While "raid1" could be handled in this way, the other RAID types
could not be - it would corrupt the ordering of the data on the array. Thus,
when a device is removed from a RAID array, the corresponding metadata/data
sub-LVs are removed from the 'raid_seg->meta_areas' and 'raid_seg->areas' arrays.
The slot in these 'lv_segment_area' arrays are set to 'AREA_UNASSIGNED'. RAID
is perfectly happy to construct a DM table mapping with '- -' if it comes across
area assigned in such a way. The pair of dashes is a valid way to tell the RAID
kernel target that the slot should be considered empty. So, we can remove
devices from a RAID array without affecting the correct operation of the RAID.
(It also becomes easy to replace the empty slots properly if a spare device is
available.) In the case of RAID1 device removal, the empty slot can be safely
eliminated. This is done by shifting the higher indexed devices down to fill
the slot. Even the names of the images will be renamed to properly reflect
their index in the array. Unlike the "mirror" segment type, you will never have
an image named "*_rimage_1" occupying the index position 0.
As with adding images, removing images holds off on commiting LVM metadata
until all possible changes have been made. This reduces the likelyhood of bad
intermediate stages being left due to a failure of operation or machine crash.
RAID1 '--splitmirrors', '--trackchanges', and '--merge' operations
------------------------------------------------------------------
This suite of operations is only available to the "raid1" segment type.
Splitting an image from a RAID1 array is almost identical to the removal of
an image described above. However, the metadata LV associated with the split
image is removed and the data LV is kept and promoted to a top-level device.
(i.e. It is made visible and stripped of its RAID_IMAGE status flags.)
When the '--trackchanges' option is given along with the '--splitmirrors'
argument, the metadata LV is left as part of the original array. The data LV
is set as 'VISIBLE' and read-only (~LVM_WRITE). When the array DM table is
being created, it notices the read-only, VISIBLE nature of the sub-LV and puts
in the '- -' sentinel. Only a single image can be split from the mirror and
the name of the sub-LV cannot be changed. Unlike '--splitmirrors' on its own,
the '--name' argument must not be specified. Therefore, the name of the newly
split LV will remain the same '<lv>_rimage_<N>', where 'N' is the index of the
slot in the array for which it is associated.
When an LV which was split from a RAID1 array with the '--trackchanges' option
is merged back into the array, its read/write status is restored and it is
set as "hidden" again. Recycling the array (suspend/resume) restores the sub-LV
to its position in the array and begins the process of sync'ing the changes that
were made since the time it was split from the array.
RAID device replacement with '--replace'
----------------------------------------
This option is available to all RAID segment types.
The '--replace' option can be used to remove a particular device from a RAID
logical volume and replace it with a different one in one action (CLI command).
The device device to be removed is specified as the argument to the '--replace'
option. This option can be specified more than once in a single command,
allowing multiple devices to be replaced at the same time - provided the RAID
logical volume has the necessary redundancy to allow the action. The devices
to be used as replacements can also be specified in the command; similar to the
way allocatable devices are specified during an up-convert.
Example> lvconvert --replace /dev/sdd1 --replace /dev/sde1 vg/lv /dev/sd[bc]1
RAID '--repair'
---------------
This 'lvconvert' option is available to all RAID segment types and is described
under "RAID Fault Handling".
RAID Fault Handling
===================
RAID is not like traditional LVM mirroring (i.e. the "mirror" segment type).
LVM mirroring required failed devices to be removed or the logical volume would
simply hang. RAID arrays can keep on running with failed devices. In fact, for
RAID types other than RAID1 removing a device would mean substituting an error
target or converting to a lower level RAID (e.g. RAID6 -> RAID5, or RAID4/5 to
RAID0). Therefore, rather than removing a failed device unconditionally, the
user has a couple of options to choose from.
The automated response to a device failure is handled according to the user's
preference defined in lvm.conf:activation.raid_fault_policy. The options are:
# "warn" - Use the system log to warn the user that a device in the RAID
# logical volume has failed. It is left to the user to run
# 'lvconvert --repair' manually to remove or replace the failed
# device. As long as the number of failed devices does not
# exceed the redundancy of the logical volume (1 device for
# raid4/5, 2 for raid6, etc) the logical volume will remain
# usable.
#
# "remove" - NOT CURRENTLY IMPLEMENTED OR DOCUMENTED IN example.conf.in.
# Remove the failed device and reduce the RAID logical volume
# accordingly. If a single device dies in a 3-way mirror,
# remove it and reduce the mirror to 2-way. If a single device
# dies in a RAID 4/5 logical volume, reshape it to a striped
# volume, etc - RAID 6 -> RAID 4/5 -> RAID 0. If devices
# cannot be removed for lack of redundancy, fail.
# THIS OPTION CANNOT YET BE IMPLEMENTED BECAUSE RESHAPE IS NOT
# YET SUPPORTED IN linux/drivers/md/dm-raid.c. The superblock
# does not yet hold enough information to support reshaping.
#
# "allocate" - Attempt to use any extra physical volumes in the volume
# group as spares and replace faulty devices.
If manual intervention is taken, either in response to the automated solution's
"warn" mode or simply because dmeventd hadn't run, then the user can call
'lvconvert --repair vg/lv' and follow the prompts. They will be prompted
whether or not to replace the device and cause a full recovery of the failed
device.
If replacement is chosen via the manual method or "allocate" is the policy taken
by the automated response, then 'lvconvert --replace' is the mechanism used to
attempt the replacement of the failed device.
'vgreduce --removemissing' is ineffectual at repairing RAID logical volumes. It
will remove the failed device, but the RAID logical volume will simply continue
to operate with an <unknown> sub-LV. The user should clear the failed device
with 'lvconvert --repair'.

202
doc/lvm_fault_handling.txt Normal file
View File

@@ -0,0 +1,202 @@
LVM device fault handling
=========================
Introduction
------------
This document is to serve as the definitive source for information
regarding the policies and procedures surrounding device failures
in LVM. It codifies LVM's responses to device failures as well as
the responsibilities of administrators.
Device failures can be permanent or transient. A permanent failure
is one where a device becomes inaccessible and will never be
revived. A transient failure is a failure that can be recovered
from (e.g. a power failure, intermittent network outage, block
relocation, etc). The policies for handling both types of failures
is described herein.
Users need to be aware that there are two implementations of RAID1 in LVM.
The first is defined by the "mirror" segment type. The second is defined by
the "raid1" segment type. The characteristics of each of these are defined
in lvm.conf under 'mirror_segtype_default' - the configuration setting used to
identify the default RAID1 implementation used for LVM operations.
Available Operations During a Device Failure
--------------------------------------------
When there is a device failure, LVM behaves somewhat differently because
only a subset of the available devices will be found for the particular
volume group. The number of operations available to the administrator
is diminished. It is not possible to create new logical volumes while
PVs cannot be accessed, for example. Operations that create, convert, or
resize logical volumes are disallowed, such as:
- lvcreate
- lvresize
- lvreduce
- lvextend
- lvconvert (unless '--repair' is used)
Operations that activate, deactivate, remove, report, or repair logical
volumes are allowed, such as:
- lvremove
- vgremove (will remove all LVs, but not the VG until consistent)
- pvs
- vgs
- lvs
- lvchange -a [yn]
- vgchange -a [yn]
Operations specific to the handling of failed devices are allowed and
are as follows:
- 'vgreduce --removemissing <VG>': This action is designed to remove
the reference of a failed device from the LVM metadata stored on the
remaining devices. If there are (portions of) logical volumes on the
failed devices, the ability of the operation to proceed will depend
on the type of logical volumes found. If an image (i.e leg or side)
of a mirror is located on the device, that image/leg of the mirror
is eliminated along with the failed device. The result of such a
mirror reduction could be a no-longer-redundant linear device. If
a linear, stripe, or snapshot device is located on the failed device
the command will not proceed without a '--force' option. The result
of using the '--force' option is the entire removal and complete
loss of the non-redundant logical volume. If an image or metadata area
of a RAID logical volume is on the failed device, the sub-LV affected is
replace with an error target device - appearing as <unknown> in 'lvs'
output. RAID logical volumes cannot be completely repaired by vgreduce -
'lvconvert --repair' (listed below) must be used. Once this operation is
complete on volume groups not containing RAID logical volumes, the volume
group will again have a complete and consistent view of the devices it
contains. Thus, all operations will be permitted - including creation,
conversion, and resizing operations. It is currently the preferred method
to call 'lvconvert --repair' on the individual logical volumes to repair
them followed by 'vgreduce --removemissing' to extract the physical volume's
representation in the volume group.
- 'lvconvert --repair <VG/LV>': This action is designed specifically
to operate on individual logical volumes. If, for example, a failed
device happened to contain the images of four distinct mirrors, it would
be necessary to run 'lvconvert --repair' on each of them. The ultimate
result is to leave the faulty device in the volume group, but have no logical
volumes referencing it. (This allows for 'vgreduce --removemissing' to
removed the physical volumes cleanly.) In addition to removing mirror or
RAID images that reside on failed devices, 'lvconvert --repair' can also
replace the failed device if there are spare devices available in the
volume group. The user is prompted whether to simply remove the failed
portions of the mirror or to also allocate a replacement, if run from the
command-line. Optionally, the '--use-policies' flag can be specified which
will cause the operation not to prompt the user, but instead respect
the policies outlined in the LVM configuration file - usually,
/etc/lvm/lvm.conf. Once this operation is complete, the logical volumes
will be consistent. However, the volume group will still be inconsistent -
due to the refernced-but-missing device/PV - and operations will still be
restricted to the aformentioned actions until either the device is
restored or 'vgreduce --removemissing' is run.
Device Revival (transient failures):
------------------------------------
During a device failure, the above section describes what limitations
a user can expect. However, if the device returns after a period of
time, what to expect will depend on what has happened during the time
period when the device was failed. If no automated actions (described
below) or user actions were necessary or performed, then no change in
operations or logical volume layout will occur. However, if an
automated action or one of the aforementioned repair commands was
manually run, the returning device will be perceived as having stale
LVM metadata. In this case, the user can expect to see a warning
concerning inconsistent metadata. The metadata on the returning
device will be automatically replaced with the latest copy of the
LVM metadata - restoring consistency. Note, while most LVM commands
will automatically update the metadata on a restored devices, the
following possible exceptions exist:
- pvs (when it does not read/update VG metadata)
Automated Target Response to Failures:
--------------------------------------
The only LVM target types (i.e. "personalities") that have an automated
response to failures are the mirror and RAID logical volumes. The other target
types (linear, stripe, snapshot, etc) will simply propagate the failure.
[A snapshot becomes invalid if its underlying device fails, but the
origin will remain valid - presuming the origin device has not failed.]
Starting with the "mirror" segment type, there are three types of errors that
a mirror can suffer - read, write, and resynchronization errors. Each is
described in depth below.
Mirror read failures:
If a mirror is 'in-sync' (i.e. all images have been initialized and
are identical), a read failure will only produce a warning. Data is
simply pulled from one of the other images and the fault is recorded.
Sometimes - like in the case of bad block relocation - read errors can
be recovered from by the storage hardware. Therefore, it is up to the
user to decide whether to reconfigure the mirror and remove the device
that caused the error. Managing the composition of a mirror is done with
'lvconvert' and removing a device from a volume group can be done with
'vgreduce'.
If a mirror is not 'in-sync', a read failure will produce an I/O error.
This error will propagate all the way up to the applications above the
logical volume (e.g. the file system). No automatic intervention will
take place in this case either. It is up to the user to decide what
can be done/salvaged in this senario. If the user is confident that the
images of the mirror are the same (or they are willing to simply attempt
to retreive whatever data they can), 'lvconvert' can be used to eliminate
the failed image and proceed.
Mirror resynchronization errors:
A resynchronization error is one that occurs when trying to initialize
all mirror images to be the same. It can happen due to a failure to
read the primary image (the image considered to have the 'good' data), or
due to a failure to write the secondary images. This type of failure
only produces a warning, and it is up to the user to take action in this
case. If the error is transient, the user can simply reactivate the
mirrored logical volume to make another attempt at resynchronization.
If attempts to finish resynchronization fail, 'lvconvert' can be used to
remove the faulty device from the mirror.
TODO...
Some sort of response to this type of error could be automated.
Since this document is the definitive source for how to handle device
failures, the process should be defined here. If the process is defined
but not implemented, it should be noted as such. One idea might be to
make a single attempt to suspend/resume the mirror in an attempt to
redo the sync operation that failed. On the other hand, if there is
a permanent failure, it may simply be best to wait for the user or the
automated response that is sure to follow from a write failure.
...TODO
Mirror write failures:
When a write error occurs on a mirror constituent device, an attempt
to handle the failure is automatically made. This is done by calling
'lvconvert --repair --use-policies'. The policies implied by this
command are set in the LVM configuration file. They are:
- mirror_log_fault_policy: This defines what action should be taken
if the device containing the log fails. The available options are
"remove" and "allocate". Either of these options will cause the
faulty log device to be removed from the mirror. The "allocate"
policy will attempt the further action of trying to replace the
failed disk log by using space that might be available in the
volume group. If the allocation fails (or the "remove" policy
is specified), the mirror log will be maintained in memory. Should
the machine be rebooted or the logical volume deactivated, a
complete resynchronization of the mirror will be necessary upon
the follow activation - such is the nature of a mirror with a 'core'
log. The default policy for handling log failures is "allocate".
The service disruption incurred by replacing the failed log is
negligible, while the benefits of having persistent log is
pronounced.
- mirror_image_fault_policy: This defines what action should be taken
if a device containing an image fails. Again, the available options
are "remove" and "allocate". Both of these options will cause the
faulty image device to be removed - adjusting the logical volume
accordingly. For example, if one image of a 2-way mirror fails, the
mirror will be converted to a linear device. If one image of a
3-way mirror fails, the mirror will be converted to a 2-way mirror.
The "allocate" policy takes the further action of trying to replace
the failed image using space that is available in the volume group.
Replacing a failed mirror image will incure the cost of
resynchronizing - degrading the performance of the mirror. The
default policy for handling an image failure is "remove". This
allows the mirror to still function, but gives the administrator the
choice of when to incure the extra performance costs of replacing
the failed image.
RAID logical volume device failures are handled differently from the "mirror"
segment type. Discussion of this can be found in lvm2-raid.txt.

197
doc/lvmetad_design.txt Normal file
View File

@@ -0,0 +1,197 @@
The design of LVMetaD
=====================
Invocation and setup
--------------------
The daemon should be started automatically by the first LVM command issued on
the system, when needed. The usage of the daemon should be configurable in
lvm.conf, probably with its own section. Say
lvmetad {
enabled = 1 # default
autostart = 1 # default
socket = "/path/to/socket" # defaults to /var/run/lvmetad or such
}
Library integration
-------------------
When a command needs to access metadata, it currently needs to perform a scan
of the physical devices available in the system. This is a possibly quite
expensive operation, especially if many devices are attached to the system. In
most cases, LVM needs a complete image of the system's PVs to operate
correctly, so all devices need to be read, to at least determine presence (and
content) of a PV label. Additional IO is done to obtain or write metadata
areas, but this is only marginally related and addressed by Dave's
metadata-balancing work.
In the existing scanning code, a cache layer exists, under
lib/cache/lvmcache.[hc]. This layer is keeping a textual copy of the metadata
for a given volume group, in a format_text form, as a character string. We can
plug the lvmetad interface at this level: in lvmcache_get_vg, which is
responsible for looking up metadata in a local cache, we can, if the metadata
is not available in the local cache, query lvmetad. Under normal circumstances,
when a VG is not cached yet, this operation fails and prompts the caller to
perform a scan. Under the lvmetad enabled scenario, this would never happen and
the fall-through would only be activated when lvmetad is disabled, which would
lead to local cache being populated as usual through a locally executed scan.
Therefore, existing stand-alone (i.e. no lvmetad) functionality of the tools
would be not compromised by adding lvmetad. With lvmetad enabled, however,
significant portions of the code would be short-circuited.
Scanning
--------
Initially (at least), the lvmetad will be not allowed to read disks: it will
rely on an external program to provide the metadata. In the ideal case, this
will be triggered by udev. The role of lvmetad is then to collect and maintain
an accurate (up to the data it has received) image of the VGs available in the
system. I imagine we could extend the pvscan command (or add a new one, say
lvmetad_client, if pvscan is found to be inappropriate):
$ pvscan --cache /dev/foo
$ pvscan --cache --remove /dev/foo
These commands would simply read the label and the MDA (if applicable) from the
given PV and feed that data to the running lvmetad, using
lvmetad_{add,remove}_pv (see lvmetad_client.h).
We however need to ensure a couple of things here:
1) only LVM commands ever touch PV labels and VG metadata
2) when a device is added or removed, udev fires a rule to notify lvmetad
While the latter is straightforward, there are issues with the first. We
*might* want to invoke the dreaded "watch" udev rule in this case, however it
ends up being implemented. Of course, we can also rely on the sysadmin to be
reasonable and not write over existing LVM metadata without first telling LVM
to let go of the respective device(s).
Even if we simply ignore the problem, metadata write should fail in these
cases, so the admin should be unable to do substantial damage to the system. If
there were active LVs on top of the vanished PV, they are in trouble no matter
what happens there.
Incremental scan
----------------
There are some new issues arising with the "udev" scan mode. Namely, the
devices of a volume group will be appearing one by one. The behaviour in this
case will be very similar to the current behaviour when devices are missing:
the volume group, until *all* its physical volumes have been discovered and
announced by udev, will be in a state with some of its devices flagged as
MISSING_PV. This means that the volume group will be, for most purposes,
read-only until it is complete and LVs residing on yet-unknown PVs won't
activate without --partial. Under usual circumstances, this is not a problem
and the current code for dealing with MISSING_PVs should be adequate.
However, the code for reading volume groups from disks will need to be adapted,
since it currently does not work incrementally. Such support will need to track
metadata-less PVs that have been encountered so far and to provide a way to
update an existing volume group. When the first PV with metadata of a given VG
is encountered, the VG is created in lvmetad (probably in the form of "struct
volume_group") and it is assigned any previously cached metadata-less PVs it is
referencing. Any PVs that were not yet encountered will be marked as MISSING_PV
in the "struct volume_group". Upon scanning a new PV, if it belongs to any
already-known volume group, this PV is checked for consistency with the already
cached metadata (in a case of mismatch, the VG needs to be recovered or
declared conflicted), and is subsequently unmarked MISSING_PV. Care need be
taken not to unmark MISSING_PV on PVs that have this flag in their persistent
metadata, though.
The most problematic aspect of the whole design may be orphan PVs. At any given
point, a metadata-less PV may appear orphaned, if a PV of its VG with metadata
has not been scanned yet. Eventually, we will have to decide that this PV is
really an orphan and enable its usage for creating or extending VGs. In
practice, the decision might be governed by a timeout or assumed immediately --
the former case is a little safer, the latter is probably more transparent. I
am not very keen on using timeouts and we can probably assume that the admin
won't blindly try to re-use devices in a way that would trip up LVM in this
respect. I would be in favour of just assuming that metadata-less VGs with no
known referencing VGs are orphans -- after all, this is the same approach as we
use today. The metadata balancing support may stress this a bit more than the
usual contemporary setups do, though.
Automatic activation
--------------------
It may also be prudent to provide a command that will block until a volume
group is complete, so that scripts can reliably activate/mount LVs and such. Of
course, some PVs may never appear, so a timeout is necessary. Again, this is
something not handled by current tools, but may become more important in
future. It probably does not need to be implemented right away though.
The other aspect of the progressive VG assembly is automatic activation. The
currently only problem with that is that we would like to avoid having
activation code in lvmetad, so we would prefer to fire up an event of some sort
and let someone else handle the activation and whatnot.
Cluster support
---------------
When working in a cluster, clvmd integration will be necessary: clvmd will need
to instruct lvmetad to re-read metadata as appropriate due to writes on remote
hosts. Overall, this is not hard, but the devil is in the details. I would
possibly disable lvmetad for clustered volume groups in the first phase and
only proceed when the local mode is robust and well tested.
Protocol & co.
--------------
I expect a simple text-based protocol executed on top of an Unix Domain Socket
to be the communication interface for lvmetad. Ideally, the requests and
replies will be well-formed "config file" style strings, so we can re-use
existing parsing infrastructure.
Since we already have two daemons, I would probably look into factoring some
common code for daemon-y things, like sockets, communication (including thread
management) and maybe logging and re-using it in all the daemons (clvmd,
dmeventd and lvmetad). This shared infrastructure should live under
daemons/common, and the existing daemons shall be gradually migrated to the
shared code.
Future extensions
-----------------
The above should basically cover the use of lvmetad as a cache-only
daemon. Writes could still be executed locally, and the new metadata version
can be provided to lvmetad through the socket the usual way. This is fairly
natural and in my opinion reasonable. The lvmetad acts like a cache that will
hold metadata, no more no less.
Above this, there is a couple of things that could be worked on later, when the
above basic design is finished and implemented.
_Metadata writing_: We may want to support writing new metadata through
lvmetad. This may or may not be a better design, but the write itself should be
more or less orthogonal to the rest of the story outlined above.
_Locking_: Other than directing metadata writes through lvmetad, one could
conceivably also track VG/LV locking through the same.
_Clustering_: A deeper integration of lvmetad with clvmd might be possible and
maybe desirable. Since clvmd communicates over the network with other clvmd
instances, this could be extended to metadata exchange between lvmetad's,
further cutting down scanning costs. This would combine well with the
write-through-lvmetad approach.
Testing
-------
Since (at least bare-bones) lvmetad has no disk interaction and is fed metadata
externally, it should be very amenable to automated testing. We need to provide
a client that can feed arbitrary, synthetic metadata to the daemon and request
the data back, providing reasonable (nearly unit-level) testing infrastructure.
Battle plan & code layout
=========================
- config_tree from lib/config needs to move to libdm/
- daemon/common *client* code can go to libdm/ as well (say
libdm/libdm-daemon.{h,c} or such)
- daemon/common *server* code stays, is built in daemon/ toplevel as a static
library, say libdaemon-common.a
- daemon/lvmetad *client* code goes to lib/lvmetad
- daemon/lvmetad *server* code stays (links in daemon/libdaemon_common.a)

View File

@@ -79,13 +79,13 @@ Usage Examples
lvm.conf: (Identical on every machine - global settings)
tags {
hostname_tags = 1
hosttags = 1
}
From any machine in the cluster, add db1 to the list of machines that
activate vg1/lvol2:
lvchange --tag @db1 vg1/lvol2
lvchange --addtag @db1 vg1/lvol2
(followed by lvchange -ay to actually activate it)
@@ -98,10 +98,10 @@ Usage Examples
Option (i) - centralised admin, static configuration replicated between hosts
# Add @database tag to vg1's metadata
vgchange --tag @database vg1
vgchange --addtag @database vg1
# Add @fileserver tag to vg2's metadata
vgchange --tag @fileserver vg2
vgchange --addtag @fileserver vg2
lvm.conf: (Identical on every machine)
tags {
@@ -122,17 +122,17 @@ Usage Examples
}
In the event of the fileserver host going down, vg2 can be brought up
on fsb1 by running *on any node* 'vgchange --tag @fileserverbackup vg2'
on fsb1 by running *on any node* 'vgchange --addtag @fileserverbackup vg2'
followed by 'vgchange -ay vg2'
Option (ii) - localised admin & configuation
(i.e. each host holds *locally* which classes of volumes to activate)
# Add @database tag to vg1's metadata
vgchange --tag @database vg1
vgchange --addtag @database vg1
# Add @fileserver tag to vg2's metadata
vgchange --tag @fileserver vg2
vgchange --addtag @fileserver vg2
lvm.conf: (Identical on every machine - global settings)
tags {

83
doc/udev_assembly.txt Normal file
View File

@@ -0,0 +1,83 @@
Automatic device assembly by udev
=================================
We want to asynchronously assemble and activate devices as their components
become available. Eventually, the complete storage stack should be covered,
including: multipath, cryptsetup, LVM, mdadm. Each of these can be addressed
more or less separately.
The general plan of action is to simply provide udev rules for each of the
device "type": for MD component devices, PVs, LUKS/crypto volumes and for
multipathed SCSI devices. There's no compelling reason to have a daemon do these
things: all systems that actually need to assemble multiple devices into a
single entity already either support incremental assembly or will do so shortly.
Whenever in this document we talk about udev rules, these may include helper
programs that implement a multi-step process. In many cases, it can be expected
that the functionality can be implemented in couple lines of shell (or couple
hundred of C).
Multipath
---------
For multipath, we will need to rely on SCSI IDs for now, until we have a better
scheme of things, since multipath devices can't be identified until the second
path appears, and unfortunately we need to decide whether a device is multipath
when the *first* path appears. Anyway, the multipath folks need to sort this
out, but it shouldn't bee too hard. Just bring up multipathing on anything that
appears and is set up for multipathing.
LVM
---
For LVM, the crucial piece of the puzzle is lvmetad, which allows us to build up
VGs from PVs as they appear, and at the same time collect information on what is
already available. A command, pvscan --cache is expected to be used to
implement udev rules. It is relatively easy to make this command print out a
list of VGs (and possibly LVs) that have been made available by adding any
particular device to the set of visible devices. In othe words, udev says "hey,
/dev/sdb just appeared", calls pvscan --cache, which talks to lvmetad, which
says "cool, that makes vg0 complete". Pvscan takes this info and prints it out,
and the udev rule can then somehow decide whether anything needs to be done
about this "vg0". Presumably a table of devices that need to be activated
automatically is made available somewhere in /etc (probably just a simple list
of volume groups or logical volumes, given by name or UUID, globbing
possible). The udev rule can then consult this file.
Cryptsetup
----------
This may be the trickiest of the lot: the obvious hurdle here is that crypto
volumes need to somehow obtain a key (passphrase, physical token or such),
meaning there is interactivity involved. On the upside, dm-crypt is a 1:1
system: one encrypted device results in one decrypted device, so no assembly or
notification needs to be done. While interactivity is a challenge, there are at
least partial solutions around. (TODO: Milan should probably elaborate here.)
(For LUKS devices, these can probably be detected automatically. I suppose that
non-LUKS devices can be looked up in crypttab by the rule, to decide what is the
appropriate action to take.)
MD
--
Fortunately, MD (namely mdadm) already comes with a mechanism for incremental
assembly (mdadm -I or such). We can assume that this fits with the rest of stack
nicely.
Filesystem &c. discovery
========================
Considering other requirements that exist for storage systems (namely
large-scale storage deployments), it is absolutely not feasible to have the
system hunt automatically for filesystems based on their UUIDs. In a number of
cases, this could mean activating tens of thousands of volumes. On small
systems, asking for all volumes to be brought up automatically is probably the
best route anyway, and once all storage devices are activated, scanning for
filesystems is no different from today.
In effect, no action is required on this count: only filesystems that are
available on already active devices can be mounted by their UUID. Activating
volumes by naming a filesystem UUID is useless, since to read the UUID the
volume needs to be active first.

View File

@@ -1,9 +1,11 @@
@top_srcdir@/daemons/clvmd/clvm.h
@top_srcdir@/daemons/dmeventd/libdevmapper-event.h
@top_srcdir@/daemons/lvmetad/lvmetad-client.h
@top_srcdir@/liblvm/lvm2app.h
@top_srcdir@/lib/activate/activate.h
@top_srcdir@/lib/activate/targets.h
@top_srcdir@/lib/cache/lvmcache.h
@top_srcdir@/lib/cache/lvmetad.h
@top_srcdir@/lib/commands/errors.h
@top_srcdir@/lib/commands/toolcontext.h
@top_srcdir@/lib/config/config.h
@@ -16,6 +18,7 @@
@top_srcdir@/lib/display/display.h
@top_srcdir@/lib/filters/filter-composite.h
@top_srcdir@/lib/filters/filter-md.h
@top_srcdir@/lib/filters/filter-mpath.h
@top_srcdir@/lib/filters/filter-persistent.h
@top_srcdir@/lib/filters/filter-regex.h
@top_srcdir@/lib/filters/filter-sysfs.h
@@ -30,11 +33,14 @@
@top_srcdir@/lib/locking/locking.h
@top_srcdir@/lib/log/log.h
@top_srcdir@/lib/log/lvm-logging.h
@top_srcdir@/lib/metadata/lv.h
@top_srcdir@/lib/metadata/lv_alloc.h
@top_srcdir@/lib/metadata/metadata.h
@top_srcdir@/lib/metadata/metadata-exported.h
@top_srcdir@/lib/metadata/pv.h
@top_srcdir@/lib/metadata/pv_alloc.h
@top_srcdir@/lib/metadata/segtype.h
@top_srcdir@/lib/metadata/vg.h
@top_srcdir@/lib/mm/memlock.h
@top_srcdir@/lib/mm/xlate.h
@top_builddir@/lib/misc/configure.h
@@ -49,9 +55,13 @@
@top_srcdir@/lib/misc/lvm-string.h
@top_builddir@/lib/misc/lvm-version.h
@top_srcdir@/lib/misc/lvm-wrappers.h
@top_srcdir@/lib/misc/lvm-percent.h
@top_srcdir@/lib/misc/sharedlib.h
@top_srcdir@/lib/report/properties.h
@top_srcdir@/lib/report/report.h
@top_srcdir@/lib/uuid/uuid.h
@top_srcdir@/libdaemon/client/daemon-client.h
@top_srcdir@/libdaemon/client/daemon-shared.h
@top_srcdir@/libdm/libdevmapper.h
@top_srcdir@/libdm/misc/dm-ioctl.h
@top_srcdir@/libdm/misc/dm-logging.h

View File

@@ -1,6 +1,6 @@
#
# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
# Copyright (C) 2004 Red Hat, Inc. All rights reserved.
# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -12,16 +12,11 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
SHELL = /bin/sh
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
LN_S = @LN_S@
.PHONY: clean distclean all install pofile install_cluster install_device-mapper
include $(top_builddir)/make.tmpl
all: .symlinks_created
@@ -30,23 +25,11 @@ all: .symlinks_created
for i in `cat $<`; do $(LN_S) $$i ; done
touch $@
distclean:
find . -maxdepth 1 -type l -exec $(RM) \{\} \;
$(RM) Makefile .include_symlinks .symlinks_created .symlinks
pofile: all
device-mapper: all
clean:
install:
install_cluster:
install_device-mapper:
install_lvm2:
cflow:
cflow: all
DISTCLEAN_TARGETS += $(shell find . -maxdepth 1 -type l)
DISTCLEAN_TARGETS += .include_symlinks .symlinks_created .symlinks

View File

@@ -1,6 +1,6 @@
#
# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
# Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -15,7 +15,6 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
VPATH = @srcdir@
ifeq ("@LVM1@", "shared")
SUBDIRS = format1
@@ -33,6 +32,18 @@ ifeq ("@MIRRORS@", "shared")
SUBDIRS += mirror
endif
ifeq ("@RAID@", "shared")
SUBDIRS += raid
endif
ifeq ("@REPLICATORS@", "shared")
SUBDIRS += replicator
endif
ifeq ("@THIN@", "shared")
SUBDIRS += thin
endif
SOURCES =\
activate/activate.c \
cache/lvmcache.c \
@@ -44,6 +55,7 @@ SOURCES =\
device/dev-io.c \
device/dev-md.c \
device/dev-swap.c \
device/dev-luks.c \
device/device.c \
display/display.c \
error/errseg.c \
@@ -53,6 +65,7 @@ SOURCES =\
filters/filter-regex.c \
filters/filter-sysfs.c \
filters/filter-md.c \
filters/filter-mpath.c \
filters/filter.c \
format_text/archive.c \
format_text/archiver.c \
@@ -69,22 +82,30 @@ SOURCES =\
locking/locking.c \
locking/no_locking.c \
log/log.c \
metadata/lv.c \
metadata/lv_manip.c \
metadata/merge.c \
metadata/metadata.c \
metadata/mirror.c \
metadata/pv.c \
metadata/pv_manip.c \
metadata/pv_map.c \
metadata/raid_manip.c \
metadata/replicator_manip.c \
metadata/segtype.c \
metadata/snapshot_manip.c \
metadata/thin_manip.c \
metadata/vg.c \
misc/crc.c \
misc/lvm-exec.c \
misc/lvm-file.c \
misc/lvm-globals.c \
misc/lvm-string.c \
misc/lvm-wrappers.c \
misc/lvm-percent.c \
misc/util.c \
mm/memlock.c \
report/properties.c \
report/report.c \
striped/striped.c \
uuid/uuid.c \
@@ -130,6 +151,18 @@ ifeq ("@MIRRORS@", "internal")
SOURCES += mirror/mirrored.c
endif
ifeq ("@RAID@", "internal")
SOURCES += raid/raid.c
endif
ifeq ("@REPLICATORS@", "internal")
SOURCES += replicator/replicator.c
endif
ifeq ("@THIN@", "internal")
SOURCES += thin/thin.c
endif
ifeq ("@DEVMAPPER@", "yes")
SOURCES +=\
activate/dev_manager.c \
@@ -142,24 +175,36 @@ ifeq ("@HAVE_LIBDL@", "yes")
misc/sharedlib.c
endif
ifeq ("@BUILD_LVMETAD@", "yes")
SOURCES +=\
cache/lvmetad.c
endif
ifeq ("@DMEVENTD@", "yes")
CLDFLAGS += -L../daemons/dmeventd
CLDFLAGS += -L$(top_builddir)/daemons/dmeventd
LIBS += -ldevmapper-event
endif
LIB_NAME = liblvm-internal
LIB_STATIC = $(LIB_NAME).a
CLEAN_TARGETS += $(LIB_NAME).cflow
ifeq ($(MAKECMDGOALS),distclean)
SUBDIRS =\
format1 \
format_pool \
snapshot \
mirror \
raid \
replicator \
thin \
locking
endif
include ../make.tmpl
CFLOW_LIST = $(SOURCES)
CFLOW_LIST_TARGET = $(LIB_NAME).cflow
include $(top_builddir)/make.tmpl
$(SUBDIRS): $(LIB_STATIC)
$(LIB_NAME).cflow: $(SOURCES)
set -e; (echo -n "SOURCES += "; \
echo $(SOURCES) | \
sed "s/^/ /;s/ / $(top_srcdir)\/lib\//g;s/$$//"; \
) > $@
cflow: $(LIB_NAME).cflow
DISTCLEAN_TARGETS += misc/configure.h misc/lvm-version.h

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -30,6 +30,17 @@ struct lvinfo {
uint32_t read_ahead;
};
struct lv_activate_opts {
int exclusive;
int origin_only;
int no_merging;
int real_pool;
int is_activate;
int skip_in_use;
unsigned revert;
unsigned read_only;
};
/* target attribute flags */
#define MIRROR_LOG_CLUSTERED 0x00000001U
@@ -44,7 +55,8 @@ int module_present(struct cmd_context *cmd, const char *target_name);
int target_present(struct cmd_context *cmd, const char *target_name,
int use_modprobe);
int target_version(const char *target_name, uint32_t *maj,
uint32_t *min, uint32_t *patchlevel);
uint32_t *min, uint32_t *patchlevel);
int lvm_dm_prefix_check(int major, int minor, const char *prefix);
int list_segment_modules(struct dm_pool *mem, const struct lv_segment *seg,
struct dm_list *modules);
int list_lv_modules(struct dm_pool *mem, const struct logical_volume *lv,
@@ -53,10 +65,11 @@ int list_lv_modules(struct dm_pool *mem, const struct logical_volume *lv,
void activation_release(void);
void activation_exit(void);
int lv_suspend(struct cmd_context *cmd, const char *lvid_s);
int lv_suspend_if_active(struct cmd_context *cmd, const char *lvid_s);
int lv_resume(struct cmd_context *cmd, const char *lvid_s);
int lv_resume_if_active(struct cmd_context *cmd, const char *lvid_s);
/* int lv_suspend(struct cmd_context *cmd, const char *lvid_s); */
int lv_suspend_if_active(struct cmd_context *cmd, const char *lvid_s, unsigned origin_only, unsigned exclusive);
int lv_resume(struct cmd_context *cmd, const char *lvid_s, unsigned origin_only);
int lv_resume_if_active(struct cmd_context *cmd, const char *lvid_s,
unsigned origin_only, unsigned exclusive, unsigned revert);
int lv_activate(struct cmd_context *cmd, const char *lvid_s, int exclusive);
int lv_activate_with_filter(struct cmd_context *cmd, const char *lvid_s,
int exclusive);
@@ -67,37 +80,65 @@ int lv_mknodes(struct cmd_context *cmd, const struct logical_volume *lv);
/*
* Returns 1 if info structure has been populated, else 0.
*/
int lv_info(struct cmd_context *cmd, const struct logical_volume *lv, struct lvinfo *info,
int with_open_count, int with_read_ahead);
int lv_info_by_lvid(struct cmd_context *cmd, const char *lvid_s,
int lv_info(struct cmd_context *cmd, const struct logical_volume *lv, int use_layer,
struct lvinfo *info, int with_open_count, int with_read_ahead);
int lv_info_by_lvid(struct cmd_context *cmd, const char *lvid_s, int use_layer,
struct lvinfo *info, int with_open_count, int with_read_ahead);
int lv_check_not_in_use(struct cmd_context *cmd, struct logical_volume *lv,
struct lvinfo *info);
/*
* Returns 1 if activate_lv has been set: 1 = activate; 0 = don't.
*/
int lv_activation_filter(struct cmd_context *cmd, const char *lvid_s,
int *activate_lv);
int lv_check_transient(struct logical_volume *lv);
/*
* Returns 1 if percent has been set, else 0.
*/
int lv_snapshot_percent(const struct logical_volume *lv, float *percent,
percent_range_t *percent_range);
int lv_mirror_percent(struct cmd_context *cmd, struct logical_volume *lv,
int wait, float *percent, percent_range_t *percent_range,
uint32_t *event_nr);
int lv_snapshot_percent(const struct logical_volume *lv, percent_t *percent);
int lv_mirror_percent(struct cmd_context *cmd, const struct logical_volume *lv,
int wait, percent_t *percent, uint32_t *event_nr);
int lv_raid_percent(const struct logical_volume *lv, percent_t *percent);
int lv_thin_pool_percent(const struct logical_volume *lv, int metadata,
percent_t *percent);
int lv_thin_percent(const struct logical_volume *lv, int mapped,
percent_t *percent);
int lv_thin_pool_transaction_id(const struct logical_volume *lv,
uint64_t *transaction_id);
/*
* Return number of LVs in the VG that are active.
*/
int lvs_in_vg_activated(struct volume_group *vg);
int lvs_in_vg_activated_by_uuid_only(struct volume_group *vg);
int lvs_in_vg_activated(const struct volume_group *vg);
int lvs_in_vg_opened(const struct volume_group *vg);
int lv_is_active(struct logical_volume *lv);
int lv_is_active(const struct logical_volume *lv);
int lv_is_active_but_not_locally(const struct logical_volume *lv);
int lv_is_active_exclusive(const struct logical_volume *lv);
int lv_is_active_exclusive_locally(const struct logical_volume *lv);
int lv_is_active_exclusive_remotely(const struct logical_volume *lv);
int monitor_dev_for_events(struct cmd_context *cmd,
struct logical_volume *lv, int do_reg);
int lv_has_target_type(struct dm_pool *mem, struct logical_volume *lv,
const char *layer, const char *target_type);
int monitor_dev_for_events(struct cmd_context *cmd, struct logical_volume *lv,
const struct lv_activate_opts *laopts, int do_reg);
#ifdef DMEVENTD
# include "libdevmapper-event.h"
char *get_monitor_dso_path(struct cmd_context *cmd, const char *libpath);
int target_registered_with_dmeventd(struct cmd_context *cmd, const char *libpath,
struct logical_volume *lv, int *pending);
int target_register_events(struct cmd_context *cmd, const char *dso, struct logical_volume *lv,
int evmask __attribute__((unused)), int set, int timeout);
#endif
int add_linear_area_to_dtree(struct dm_tree_node *node, uint64_t size,
uint32_t extent_size, int use_linear_target,
const char *vgname, const char *lvname);
/*
* Returns 1 if PV has a dependency tree that uses anything in VG.
@@ -108,6 +149,11 @@ int pv_uses_vg(struct physical_volume *pv,
/*
* Returns 1 if mapped device is not suspended.
*/
int device_is_usable(dev_t dev);
int device_is_usable(struct device *dev);
/*
* Declaration moved here from fs.h to keep header fs.h hidden
*/
void fs_unlock(void);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -19,17 +19,21 @@
#include "metadata-exported.h"
struct logical_volume;
struct lv_activate_opts;
struct volume_group;
struct cmd_context;
struct dev_manager;
struct dm_info;
struct device;
int read_only_lv(struct logical_volume *lv, struct lv_activate_opts *laopts);
/*
* Constructor and destructor.
*/
struct dev_manager *dev_manager_create(struct cmd_context *cmd,
const char *vg_name);
const char *vg_name,
unsigned track_pvmove_deps);
void dev_manager_destroy(struct dev_manager *dm);
void dev_manager_release(void);
void dev_manager_exit(void);
@@ -40,27 +44,35 @@ void dev_manager_exit(void);
* (eg, an origin is created before its snapshot, but is not
* unsuspended until the snapshot is also created.)
*/
int dev_manager_info(struct dm_pool *mem, const char *name,
const struct logical_volume *lv,
int mknodes, int with_open_count, int with_read_ahead,
int dev_manager_info(struct dm_pool *mem, const struct logical_volume *lv,
const char *layer,
int with_open_count, int with_read_ahead,
struct dm_info *info, uint32_t *read_ahead);
int dev_manager_snapshot_percent(struct dev_manager *dm,
const struct logical_volume *lv,
float *percent,
percent_range_t *percent_range);
percent_t *percent);
int dev_manager_mirror_percent(struct dev_manager *dm,
const struct logical_volume *lv, int wait,
float *percent, percent_range_t *percent_range,
uint32_t *event_nr);
percent_t *percent, uint32_t *event_nr);
int dev_manager_thin_pool_status(struct dev_manager *dm,
const struct logical_volume *lv,
struct dm_status_thin_pool **status);
int dev_manager_thin_pool_percent(struct dev_manager *dm,
const struct logical_volume *lv,
int metadata, percent_t *percent);
int dev_manager_thin_percent(struct dev_manager *dm,
const struct logical_volume *lv,
int mapped, percent_t *percent);
int dev_manager_suspend(struct dev_manager *dm, struct logical_volume *lv,
int lockfs, int flush_required);
int dev_manager_activate(struct dev_manager *dm, struct logical_volume *lv);
struct lv_activate_opts *laopts, int lockfs, int flush_required);
int dev_manager_activate(struct dev_manager *dm, struct logical_volume *lv,
struct lv_activate_opts *laopts);
int dev_manager_preload(struct dev_manager *dm, struct logical_volume *lv,
int *flush_required);
struct lv_activate_opts *laopts, int *flush_required);
int dev_manager_deactivate(struct dev_manager *dm, struct logical_volume *lv);
int dev_manager_transient(struct dev_manager *dm, struct logical_volume *lv) __attribute__((nonnull(1, 2)));
int dev_manager_lv_mknodes(const struct logical_volume *lv);
int dev_manager_lv_rmnodes(const struct logical_volume *lv);
int dev_manager_mknodes(const struct logical_volume *lv);
/*
* Put the desired changes into effect.

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -15,6 +15,7 @@
#include "lib.h"
#include "fs.h"
#include "activate.h"
#include "toolcontext.h"
#include "lvm-string.h"
#include "lvm-file.h"
@@ -26,9 +27,16 @@
#include <limits.h>
#include <dirent.h>
/*
* Library cookie to combine multiple fs transactions.
* Supports to wait for udev device settle only when needed.
*/
static uint32_t _fs_cookie = DM_COOKIE_AUTO_CREATE;
static int _fs_create = 0;
static int _mk_dir(const char *dev_dir, const char *vg_name)
{
char vg_path[PATH_MAX];
static char vg_path[PATH_MAX];
mode_t old_umask;
if (dm_snprintf(vg_path, sizeof(vg_path), "%s%s",
@@ -43,20 +51,23 @@ static int _mk_dir(const char *dev_dir, const char *vg_name)
log_very_verbose("Creating directory %s", vg_path);
(void) dm_prepare_selinux_context(vg_path, S_IFDIR);
old_umask = umask(DM_DEV_DIR_UMASK);
if (mkdir(vg_path, 0777)) {
log_sys_error("mkdir", vg_path);
umask(old_umask);
(void) dm_prepare_selinux_context(NULL, 0);
return 0;
}
umask(old_umask);
(void) dm_prepare_selinux_context(NULL, 0);
return 1;
}
static int _rm_dir(const char *dev_dir, const char *vg_name)
{
char vg_path[PATH_MAX];
static char vg_path[PATH_MAX];
if (dm_snprintf(vg_path, sizeof(vg_path), "%s%s",
dev_dir, vg_name) == -1) {
@@ -76,7 +87,7 @@ static int _rm_dir(const char *dev_dir, const char *vg_name)
static void _rm_blks(const char *dir)
{
const char *name;
char path[PATH_MAX];
static char path[PATH_MAX];
struct dirent *dirent;
struct stat buf;
DIR *d;
@@ -105,13 +116,16 @@ static void _rm_blks(const char *dir)
log_sys_error("unlink", path);
}
}
if (closedir(d))
log_sys_error("closedir", dir);
}
static int _mk_link(const char *dev_dir, const char *vg_name,
const char *lv_name, const char *dev, int check_udev)
{
char lv_path[PATH_MAX], link_path[PATH_MAX], lvm1_group_path[PATH_MAX];
char vg_path[PATH_MAX];
static char lv_path[PATH_MAX], link_path[PATH_MAX], lvm1_group_path[PATH_MAX];
static char vg_path[PATH_MAX];
struct stat buf, buf_lp;
if (dm_snprintf(vg_path, sizeof(vg_path), "%s%s",
@@ -196,13 +210,14 @@ static int _mk_link(const char *dev_dir, const char *vg_name,
"direct link creation.", lv_path);
log_very_verbose("Linking %s -> %s", lv_path, link_path);
(void) dm_prepare_selinux_context(lv_path, S_IFLNK);
if (symlink(link_path, lv_path) < 0) {
log_sys_error("symlink", lv_path);
(void) dm_prepare_selinux_context(NULL, 0);
return 0;
}
if (!dm_set_selinux_context(lv_path, S_IFLNK))
return_0;
(void) dm_prepare_selinux_context(NULL, 0);
return 1;
}
@@ -211,7 +226,7 @@ static int _rm_link(const char *dev_dir, const char *vg_name,
const char *lv_name, int check_udev)
{
struct stat buf;
char lv_path[PATH_MAX];
static char lv_path[PATH_MAX];
if (dm_snprintf(lv_path, sizeof(lv_path), "%s%s/%s",
dev_dir, vg_name, lv_name) == -1) {
@@ -219,9 +234,12 @@ static int _rm_link(const char *dev_dir, const char *vg_name,
return 0;
}
if (lstat(lv_path, &buf) && errno == ENOENT)
return 1;
else if (dm_udev_get_sync_support() && udev_checking() && check_udev)
if (lstat(lv_path, &buf)) {
if (errno == ENOENT)
return 1;
log_sys_error("lstat", lv_path);
return 0;
} else if (dm_udev_get_sync_support() && udev_checking() && check_udev)
log_warn("The link %s should have been removed by udev "
"but it is still present. Falling back to "
"direct link removal.", lv_path);
@@ -243,7 +261,8 @@ static int _rm_link(const char *dev_dir, const char *vg_name,
typedef enum {
FS_ADD,
FS_DEL,
FS_RENAME
FS_RENAME,
NUM_FS_OPS
} fs_op_t;
static int _do_fs_op(fs_op_t type, const char *dev_dir, const char *vg_name,
@@ -269,12 +288,19 @@ static int _do_fs_op(fs_op_t type, const char *dev_dir, const char *vg_name,
if (!_mk_link(dev_dir, vg_name, lv_name, dev, check_udev))
stack;
default:
; /* NOTREACHED */
}
return 1;
}
static DM_LIST_INIT(_fs_ops);
/*
* Count number of stacked fs_op_t operations to allow to skip dm_list search.
* FIXME: handling of FS_RENAME
*/
static int _count_fs_ops[NUM_FS_OPS];
struct fs_op_parms {
struct dm_list list;
@@ -295,15 +321,84 @@ static void _store_str(char **pos, char **ptr, const char *str)
*pos += strlen(*ptr) + 1;
}
static void _del_fs_op(struct fs_op_parms *fsp)
{
_count_fs_ops[fsp->type]--;
dm_list_del(&fsp->list);
dm_free(fsp);
}
/* Check if there is other the type of fs operation stacked */
static int _other_fs_ops(fs_op_t type)
{
unsigned i;
for (i = 0; i < NUM_FS_OPS; i++)
if (type != i && _count_fs_ops[i])
return 1;
return 0;
}
/* Check if udev is supposed to create nodes */
static int _check_udev(int check_udev)
{
return check_udev && dm_udev_get_sync_support() && dm_udev_get_checking();
}
/* FIXME: duplication of the code from libdm-common.c */
static int _stack_fs_op(fs_op_t type, const char *dev_dir, const char *vg_name,
const char *lv_name, const char *dev,
const char *old_lv_name, int check_udev)
{
struct dm_list *fsph, *fspht;
struct fs_op_parms *fsp;
size_t len = strlen(dev_dir) + strlen(vg_name) + strlen(lv_name) +
strlen(dev) + strlen(old_lv_name) + 5;
char *pos;
if ((type == FS_DEL) && _other_fs_ops(type))
/*
* Ignore any outstanding operations on the fs_op if deleting it.
*/
dm_list_iterate_safe(fsph, fspht, &_fs_ops) {
fsp = dm_list_item(fsph, struct fs_op_parms);
if (!strcmp(lv_name, fsp->lv_name) &&
!strcmp(vg_name, fsp->vg_name)) {
_del_fs_op(fsp);
if (!_other_fs_ops(type))
break; /* no other non DEL ops */
}
}
else if ((type == FS_ADD) && _count_fs_ops[FS_DEL] && _check_udev(check_udev))
/*
* If udev is running ignore previous DEL operation on added fs_op.
* (No other operations for this device then DEL could be stacked here).
*/
dm_list_iterate_safe(fsph, fspht, &_fs_ops) {
fsp = dm_list_item(fsph, struct fs_op_parms);
if ((fsp->type == FS_DEL) &&
!strcmp(lv_name, fsp->lv_name) &&
!strcmp(vg_name, fsp->vg_name)) {
_del_fs_op(fsp);
break; /* no other DEL ops */
}
}
else if ((type == FS_RENAME) && _check_udev(check_udev))
/*
* If udev is running ignore any outstanding operations if renaming it.
*
* Currently RENAME operation happens through 'suspend -> resume'.
* On 'resume' device is added with read_ahead settings, so it
* safe to remove any stacked ADD, RENAME, READ_AHEAD operation
* There cannot be any DEL operation on the renamed device.
*/
dm_list_iterate_safe(fsph, fspht, &_fs_ops) {
fsp = dm_list_item(fsph, struct fs_op_parms);
if (!strcmp(old_lv_name, fsp->lv_name) &&
!strcmp(vg_name, fsp->vg_name))
_del_fs_op(fsp);
}
if (!(fsp = dm_malloc(sizeof(*fsp) + len))) {
log_error("No space to stack fs operation");
return 0;
@@ -319,6 +414,7 @@ static int _stack_fs_op(fs_op_t type, const char *dev_dir, const char *vg_name,
_store_str(&pos, &fsp->dev, dev);
_store_str(&pos, &fsp->old_lv_name, old_lv_name);
_count_fs_ops[type]++;
dm_list_add(&_fs_ops, &fsp->list);
return 1;
@@ -333,16 +429,17 @@ static void _pop_fs_ops(void)
fsp = dm_list_item(fsph, struct fs_op_parms);
_do_fs_op(fsp->type, fsp->dev_dir, fsp->vg_name, fsp->lv_name,
fsp->dev, fsp->old_lv_name, fsp->check_udev);
dm_list_del(&fsp->list);
dm_free(fsp);
_del_fs_op(fsp);
}
_fs_create = 0;
}
static int _fs_op(fs_op_t type, const char *dev_dir, const char *vg_name,
const char *lv_name, const char *dev, const char *old_lv_name,
int check_udev)
{
if (memlock()) {
if (critical_section()) {
if (!_stack_fs_op(type, dev_dir, vg_name, lv_name, dev,
old_lv_name, check_udev))
return_0;
@@ -388,8 +485,33 @@ int fs_rename_lv(struct logical_volume *lv, const char *dev,
void fs_unlock(void)
{
if (!memlock()) {
if (!critical_section()) {
log_debug("Syncing device names");
/* Wait for all processed udev devices */
if (!dm_udev_wait(_fs_cookie))
stack;
_fs_cookie = DM_COOKIE_AUTO_CREATE; /* Reset cookie */
dm_lib_release();
_pop_fs_ops();
}
}
uint32_t fs_get_cookie(void)
{
return _fs_cookie;
}
void fs_set_cookie(uint32_t cookie)
{
_fs_cookie = cookie;
}
void fs_set_create(void)
{
_fs_create = 1;
}
int fs_has_non_delete_ops(void)
{
return _fs_create || _other_fs_ops(FS_DEL);
}

View File

@@ -29,6 +29,10 @@ int fs_del_lv_byname(const char *dev_dir, const char *vg_name,
const char *lv_name, int check_udev);
int fs_rename_lv(struct logical_volume *lv, const char *dev,
const char *old_vgname, const char *old_lvname);
void fs_unlock(void);
/* void fs_unlock(void); moved to activate.h */
uint32_t fs_get_cookie(void);
void fs_set_cookie(uint32_t cookie);
void fs_set_create(void);
int fs_has_non_delete_ops(void);
#endif

View File

@@ -29,6 +29,4 @@ int add_areas_line(struct dev_manager *dm, struct lv_segment *seg,
int build_dev_string(struct dev_manager *dm, char *dlid, char *devbuf,
size_t bufsize, const char *desc);
char *build_dlid(struct dev_manager *dm, const char *lvid, const char *layer);
#endif

Some files were not shown because too many files have changed in this diff Show More