object-storeage: refactor to use one memcache key

Address BZ 868087: https://bugzilla.redhat.com/show_bug.cgi?id=868087

Store all of the data needed to generate the correct set of container and
account details in one object, respectively, rather than using three seperate
memcache keys.

Change-Id: I46bf60c405b37cdb22727965bfd67bc5c410e77c
BUG: 868087
Signed-off-by: Peter Portante <peter.portante@redhat.com>
Reviewed-on: http://review.gluster.org/4139
Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
This commit is contained in:
Peter Portante 2012-10-23 11:47:44 -04:00 committed by Vijay Bellur
parent abc8e5b01b
commit 6478b569c3
4 changed files with 394 additions and 65 deletions

View File

@ -49,6 +49,9 @@ DEFAULT_UID = -1
DEFAULT_GID = -1
PICKLE_PROTOCOL = 2
CHUNK_SIZE = 65536
MEMCACHE_KEY_PREFIX = 'gluster.swift.'
MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + 'account.details.'
MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + 'container.details.'
def mkdirs(path):
@ -421,108 +424,114 @@ def update_list(path, const_path, dirs=[], files=[], object_count=0,
obj_list)
return object_count, bytes_used
def get_container_details_from_fs(cont_path, const_path,
memcache=None):
class ContainerDetails(object):
def __init__(self, bytes_used, object_count, obj_list, dir_list):
self.bytes_used = bytes_used
self.object_count = object_count
self.obj_list = obj_list
self.dir_list = dir_list
def _get_container_details_from_fs(cont_path):
"""
get container details by traversing the filesystem
"""
bytes_used = 0
object_count = 0
obj_list=[]
obj_list = []
dir_list = []
if os.path.isdir(cont_path):
for (path, dirs, files) in os.walk(cont_path):
object_count, bytes_used = update_list(path, const_path, dirs, files,
object_count, bytes_used = update_list(path, cont_path, dirs, files,
object_count, bytes_used,
obj_list)
dir_list.append(path + ':' + str(do_stat(path).st_mtime))
dir_list.append((path, do_stat(path).st_mtime))
if memcache:
memcache.set(strip_obj_storage_path(cont_path), obj_list)
memcache.set(strip_obj_storage_path(cont_path) + '-dir_list',
','.join(dir_list))
memcache.set(strip_obj_storage_path(cont_path) + '-cont_meta',
[object_count, bytes_used])
return obj_list, object_count, bytes_used
def get_container_details_from_memcache(cont_path, const_path,
memcache):
"""
get container details stored in memcache
"""
bytes_used = 0
object_count = 0
obj_list=[]
dir_contents = memcache.get(strip_obj_storage_path(cont_path) + '-dir_list')
if not dir_contents:
return get_container_details_from_fs(cont_path, const_path,
memcache=memcache)
for i in dir_contents.split(','):
path, mtime = i.split(':')
if mtime != str(do_stat(path).st_mtime):
return get_container_details_from_fs(cont_path, const_path,
memcache=memcache)
obj_list = memcache.get(strip_obj_storage_path(cont_path))
object_count, bytes_used = memcache.get(strip_obj_storage_path(cont_path) + '-cont_meta')
return obj_list, object_count, bytes_used
return ContainerDetails(bytes_used, object_count, obj_list, dir_list)
def get_container_details(cont_path, memcache=None):
"""
Return object_list, object_count and bytes_used.
"""
mkey = ''
if memcache:
object_list, object_count, bytes_used = get_container_details_from_memcache(cont_path, cont_path,
memcache)
mkey = MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + strip_obj_storage_path(cont_path)
cd = memcache.get(mkey)
if cd:
if not cd.dir_list:
cd = None
else:
for (path, mtime) in cd.dir_list:
if mtime != do_stat(path).st_mtime:
cd = None
else:
object_list, object_count, bytes_used = get_container_details_from_fs(cont_path, cont_path)
cd = None
if not cd:
cd = _get_container_details_from_fs(cont_path)
if memcache:
memcache.set(mkey, cd)
return cd.obj_list, cd.object_count, cd.bytes_used
return object_list, object_count, bytes_used
class AccountDetails(object):
""" A simple class to store the three pieces of information associated
with an account:
def get_account_details_from_fs(acc_path, memcache=None):
1. The last known modification time
2. The count of containers in the following list
3. The list of containers
"""
def __init__(self, mtime, container_count, container_list):
self.mtime = mtime
self.container_count = container_count
self.container_list = container_list
def _get_account_details_from_fs(acc_path, acc_stats):
container_list = []
container_count = 0
if os.path.isdir(acc_path):
if not acc_stats:
acc_stats = do_stat(acc_path)
is_dir = (acc_stats.st_mode & 0040000) != 0
if is_dir:
for name in do_listdir(acc_path):
if not os.path.isdir(acc_path + '/' + name) or \
name.lower() == 'tmp' or name.lower() == 'async_pending':
if name.lower() == 'tmp' \
or name.lower() == 'async_pending' \
or not os.path.isdir(os.path.join(acc_path, name)):
continue
container_count += 1
container_list.append(name)
if memcache:
memcache.set(strip_obj_storage_path(acc_path) + '_container_list', container_list)
memcache.set(strip_obj_storage_path(acc_path)+'_mtime', str(do_stat(acc_path).st_mtime))
memcache.set(strip_obj_storage_path(acc_path)+'_container_count', container_count)
return container_list, container_count
def get_account_details_from_memcache(acc_path, memcache=None):
if memcache:
mtime = memcache.get(strip_obj_storage_path(acc_path)+'_mtime')
if not mtime or mtime != str(do_stat(acc_path).st_mtime):
return get_account_details_from_fs(acc_path, memcache)
container_list = memcache.get(strip_obj_storage_path(acc_path) + '_container_list')
container_count = memcache.get(strip_obj_storage_path(acc_path)+'_container_count')
return container_list, container_count
return AccountDetails(acc_stats.st_mtime, container_count, container_list)
def get_account_details(acc_path, memcache=None):
"""
Return container_list and container_count.
"""
acc_stats = None
mkey = ''
if memcache:
return get_account_details_from_memcache(acc_path, memcache)
mkey = MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + strip_obj_storage_path(acc_path)
ad = memcache.get(mkey)
if ad:
# FIXME: Do we really need to stat the file? If we are object
# only, then we can track the other Swift HTTP APIs that would
# modify the account and invalidate the cached entry there. If we
# are not object only, are we even called on this path?
acc_stats = do_stat(acc_path)
if ad.mtime != acc_stats.st_mtime:
ad = None
else:
return get_account_details_from_fs(acc_path, memcache)
ad = None
if not ad:
ad = _get_account_details_from_fs(acc_path, acc_stats)
if memcache:
memcache.set(mkey, ad)
return ad.container_list, ad.container_count
def _get_etag(path):
etag = md5()

View File

@ -22,6 +22,8 @@ import xattr
import cPickle as pickle
import tempfile
import hashlib
import tarfile
import shutil
from swift.common.utils import normalize_timestamp
from collections import defaultdict
from plugins import utils
@ -105,6 +107,17 @@ def _destroyxattr():
global _xattrs; _xattrs = None
class SimMemcache(object):
def __init__(self):
self._d = {}
def get(self, key):
return self._d.get(key, None)
def set(self, key, value):
self._d[key] = value
class TestUtils(unittest.TestCase):
""" Tests for plugins.utils """
@ -496,3 +509,310 @@ class TestUtils(unittest.TestCase):
assert md[utils.X_CONTAINER_COUNT] == (0, 0)
finally:
os.rmdir(td)
def test_container_details_uncached(self):
the_path = "/tmp/bar"
def mock_get_container_details_from_fs(cont_path):
bu = 5
oc = 1
ol = ['foo',]
dl = [('a',100),]
return utils.ContainerDetails(bu, oc, ol, dl)
orig_gcdff = utils._get_container_details_from_fs
utils._get_container_details_from_fs = mock_get_container_details_from_fs
try:
retval = utils.get_container_details(the_path)
cd = mock_get_container_details_from_fs(the_path)
assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
finally:
utils._get_container_details_from_fs = orig_gcdff
def test_container_details_cached_hit(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_container_details_from_fs(cont_path, bu_p=5):
bu = bu_p
oc = 1
ol = ['foo',]
dl = [('a',100),]
return utils.ContainerDetails(bu, oc, ol, dl)
def mock_do_stat(path):
class MockStat(object):
def __init__(self, mtime):
self.st_mtime = mtime
return MockStat(100)
cd = mock_get_container_details_from_fs(the_path, bu_p=6)
mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), cd)
orig_gcdff = utils._get_container_details_from_fs
utils._get_container_details_from_fs = mock_get_container_details_from_fs
orig_ds = utils.do_stat
utils.do_stat = mock_do_stat
try:
retval = utils.get_container_details(the_path, memcache=mc)
# If it did not properly use memcache, the default mocked version
# of get details from fs would return 5 bytes used instead of the
# 6 we specified above.
cd = mock_get_container_details_from_fs(the_path, bu_p=6)
assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
finally:
utils._get_container_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_container_details_cached_miss_key(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_container_details_from_fs(cont_path, bu_p=5):
bu = bu_p
oc = 1
ol = ['foo',]
dl = [('a',100),]
return utils.ContainerDetails(bu, oc, ol, dl)
def mock_do_stat(path):
# Be sure we don't miss due to mtimes not matching
self.fail("do_stat should not have been called")
cd = mock_get_container_details_from_fs(the_path + "u", bu_p=6)
mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path + "u"), cd)
orig_gcdff = utils._get_container_details_from_fs
utils._get_container_details_from_fs = mock_get_container_details_from_fs
orig_ds = utils.do_stat
utils.do_stat = mock_do_stat
try:
retval = utils.get_container_details(the_path, memcache=mc)
cd = mock_get_container_details_from_fs(the_path)
assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path)
assert mkey in mc._d
finally:
utils._get_container_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_container_details_cached_miss_dir_list(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_container_details_from_fs(cont_path, bu_p=5):
bu = bu_p
oc = 1
ol = ['foo',]
dl = []
return utils.ContainerDetails(bu, oc, ol, dl)
def mock_do_stat(path):
# Be sure we don't miss due to mtimes not matching
self.fail("do_stat should not have been called")
cd = mock_get_container_details_from_fs(the_path, bu_p=6)
mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), cd)
orig_gcdff = utils._get_container_details_from_fs
utils._get_container_details_from_fs = mock_get_container_details_from_fs
orig_ds = utils.do_stat
utils.do_stat = mock_do_stat
try:
retval = utils.get_container_details(the_path, memcache=mc)
cd = mock_get_container_details_from_fs(the_path)
assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path)
assert mkey in mc._d
assert 5 == mc._d[mkey].bytes_used
finally:
utils._get_container_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_container_details_cached_miss_mtime(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_container_details_from_fs(cont_path, bu_p=5):
bu = bu_p
oc = 1
ol = ['foo',]
dl = [('a',100),]
return utils.ContainerDetails(bu, oc, ol, dl)
def mock_do_stat(path):
# Be sure we miss due to mtimes not matching
class MockStat(object):
def __init__(self, mtime):
self.st_mtime = mtime
return MockStat(200)
cd = mock_get_container_details_from_fs(the_path, bu_p=6)
mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), cd)
orig_gcdff = utils._get_container_details_from_fs
utils._get_container_details_from_fs = mock_get_container_details_from_fs
orig_ds = utils.do_stat
utils.do_stat = mock_do_stat
try:
retval = utils.get_container_details(the_path, memcache=mc)
cd = mock_get_container_details_from_fs(the_path)
assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path)
assert mkey in mc._d
assert 5 == mc._d[mkey].bytes_used
finally:
utils._get_container_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_account_details_uncached(self):
the_path = "/tmp/bar"
def mock_get_account_details_from_fs(acc_path, acc_stats):
mt = 100
cc = 2
cl = ['a', 'b']
return utils.AccountDetails(mt, cc, cl)
orig_gcdff = utils._get_account_details_from_fs
utils._get_account_details_from_fs = mock_get_account_details_from_fs
try:
retval = utils.get_account_details(the_path)
ad = mock_get_account_details_from_fs(the_path, None)
assert retval == (ad.container_list, ad.container_count)
finally:
utils._get_account_details_from_fs = orig_gcdff
def test_account_details_cached_hit(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_account_details_from_fs(acc_path, acc_stats):
mt = 100
cc = 2
cl = ['a', 'b']
return utils.AccountDetails(mt, cc, cl)
def mock_do_stat(path):
class MockStat(object):
def __init__(self, mtime):
self.st_mtime = mtime
return MockStat(100)
ad = mock_get_account_details_from_fs(the_path, None)
ad.container_list = ['x', 'y']
mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), ad)
orig_gcdff = utils._get_account_details_from_fs
orig_ds = utils.do_stat
utils._get_account_details_from_fs = mock_get_account_details_from_fs
utils.do_stat = mock_do_stat
try:
retval = utils.get_account_details(the_path, memcache=mc)
assert retval == (ad.container_list, ad.container_count)
wrong_ad = mock_get_account_details_from_fs(the_path, None)
assert wrong_ad != ad
finally:
utils._get_account_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_account_details_cached_miss(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_account_details_from_fs(acc_path, acc_stats):
mt = 100
cc = 2
cl = ['a', 'b']
return utils.AccountDetails(mt, cc, cl)
def mock_do_stat(path):
class MockStat(object):
def __init__(self, mtime):
self.st_mtime = mtime
return MockStat(100)
ad = mock_get_account_details_from_fs(the_path, None)
ad.container_list = ['x', 'y']
mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path + 'u'), ad)
orig_gcdff = utils._get_account_details_from_fs
orig_ds = utils.do_stat
utils._get_account_details_from_fs = mock_get_account_details_from_fs
utils.do_stat = mock_do_stat
try:
retval = utils.get_account_details(the_path, memcache=mc)
correct_ad = mock_get_account_details_from_fs(the_path, None)
assert retval == (correct_ad.container_list, correct_ad.container_count)
assert correct_ad != ad
finally:
utils._get_account_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_account_details_cached_miss_mtime(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_account_details_from_fs(acc_path, acc_stats):
mt = 100
cc = 2
cl = ['a', 'b']
return utils.AccountDetails(mt, cc, cl)
def mock_do_stat(path):
class MockStat(object):
def __init__(self, mtime):
self.st_mtime = mtime
return MockStat(100)
ad = mock_get_account_details_from_fs(the_path, None)
ad.container_list = ['x', 'y']
ad.mtime = 200
mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), ad)
orig_gcdff = utils._get_account_details_from_fs
orig_ds = utils.do_stat
utils._get_account_details_from_fs = mock_get_account_details_from_fs
utils.do_stat = mock_do_stat
try:
retval = utils.get_account_details(the_path, memcache=mc)
correct_ad = mock_get_account_details_from_fs(the_path, None)
assert retval == (correct_ad.container_list, correct_ad.container_count)
assert correct_ad != ad
finally:
utils._get_account_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_get_container_details_from_fs(self):
td = tempfile.mkdtemp()
tf = tarfile.open("plugins/data/account_tree.tar.bz2", "r:bz2")
orig_cwd = os.getcwd()
os.chdir(td)
tf.extractall()
try:
ad = utils._get_account_details_from_fs(td, None)
assert ad.mtime == os.path.getmtime(td)
assert ad.container_count == 3
assert set(ad.container_list) == set(['c1', 'c2', 'c3'])
finally:
os.chdir(orig_cwd)
shutil.rmtree(td)
def test_get_container_details_from_fs_notadir(self):
tf = tempfile.NamedTemporaryFile()
cd = utils._get_container_details_from_fs(tf.name)
assert cd.bytes_used == 0
assert cd.object_count == 0
assert cd.obj_list == []
assert cd.dir_list == []
def test_get_account_details_from_fs(self):
td = tempfile.mkdtemp()
tf = tarfile.open("plugins/data/container_tree.tar.bz2", "r:bz2")
orig_cwd = os.getcwd()
os.chdir(td)
tf.extractall()
try:
cd = utils._get_container_details_from_fs(td)
assert cd.bytes_used == 30, repr(cd.bytes_used)
assert cd.object_count == 8, repr(cd.object_count)
assert cd.obj_list == ['file1', 'file3', 'file2',
'dir3', 'dir1', 'dir2',
'dir1/file1', 'dir1/file2'
], repr(cd.obj_list)
full_dir1 = os.path.join(td, 'dir1')
full_dir2 = os.path.join(td, 'dir2')
full_dir3 = os.path.join(td, 'dir3')
exp_dir_dict = { td: os.path.getmtime(td),
full_dir1: os.path.getmtime(full_dir1),
full_dir2: os.path.getmtime(full_dir2),
full_dir3: os.path.getmtime(full_dir3),
}
for d,m in cd.dir_list:
assert d in exp_dir_dict
assert exp_dir_dict[d] == m
finally:
os.chdir(orig_cwd)
shutil.rmtree(td)
def test_get_account_details_from_fs_notadir_w_stats(self):
tf = tempfile.NamedTemporaryFile()
ad = utils._get_account_details_from_fs(tf.name, os.stat(tf.name))
assert ad.mtime == os.path.getmtime(tf.name)
assert ad.container_count == 0
assert ad.container_list == []
def test_get_account_details_from_fs_notadir(self):
tf = tempfile.NamedTemporaryFile()
ad = utils._get_account_details_from_fs(tf.name, None)
assert ad.mtime == os.path.getmtime(tf.name)
assert ad.container_count == 0
assert ad.container_list == []