object-storage: turn off stat() for container list

Turn of stat() system calls used to fetch the file size during a
container listing operation since these system calls can swamp Gluster
and the result is most often not used.

When a GET or HEAD request is made on a container, stat() system calls
are made during the Python standard library method, os.walk, to
determine if a given directory entry is another directory to recurse
into, and then utils._update_list() will stat() each file to get it
size, and finally utils.get_container_details_from_fs() will stat()
each directory encountered.

For most installations we have seen so far, we don't need the
container listing to accurately return the size of all the objects in
the container, so we can reduce the number of stat() system calls by
not fetching the size of the object.

For now, turn it off by default, and provide an /etc/swift/fs.conf
configuration parameter to turn it back on:

  accurate_size_in_listing = yes

The default for the above is "no".

Change-Id: I7dde11e14bb32ecafa3eabb08852f1ffc4366b35
BUG: 903396
Signed-off-by: Mohammed Junaid <junaid@redhat.com>
Reviewed-on: http://review.gluster.org/4787
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
Reviewed-by: Anand Avati <avati@redhat.com>
This commit is contained in:
Mohammed Junaid 2013-04-07 06:05:56 +05:30 committed by Anand Avati
parent f34343d375
commit bbaa273468
4 changed files with 57 additions and 5 deletions

View File

@ -7,3 +7,11 @@ mount_ip = localhost
# methods besides UFO (not object only), which disables a caching
# optimizations in order to keep in sync with file system changes.
object_only = no
# Performance optimization parameter. When turned off, the filesystem will
# see a reduced number of stat calls, resulting in substantially faster
# response time for GET and HEAD container requests on containers with large
# numbers of objects, at the expense of an accurate count of combined bytes
# used by all objects in the container. For most installations "off" works
# fine.
accurate_size_in_listing = off

View File

@ -27,6 +27,7 @@ MOUNT_IP = 'localhost'
OBJECT_ONLY = False
RUN_DIR='/var/run/swift'
SWIFT_DIR = '/etc/swift'
_do_getsize = False
if _fs_conf.read(os.path.join('/etc/swift', 'fs.conf')):
try:
MOUNT_IP = _fs_conf.get('DEFAULT', 'mount_ip', 'localhost')
@ -41,6 +42,12 @@ if _fs_conf.read(os.path.join('/etc/swift', 'fs.conf')):
except (NoSectionError, NoOptionError):
pass
try:
_do_getsize = _fs_conf.get('DEFAULT', 'accurate_size_in_listing', \
"no") in TRUE_VALUES
except (NoSectionError, NoOptionError):
pass
NAME = 'glusterfs'

View File

@ -241,7 +241,7 @@ def _update_list(path, cont_path, src_list, reg_file=True, object_count=0,
object_count += 1
if reg_file:
if Glusterfs._do_getsize and reg_file:
bytes_used += os_path.getsize(os.path.join(path, obj_name))
sleep()

View File

@ -26,7 +26,7 @@ import tarfile
import shutil
from collections import defaultdict
from swift.common.utils import normalize_timestamp
from gluster.swift.common import utils
from gluster.swift.common import utils, Glusterfs
#
# Somewhat hacky way of emulating the operation of xattr calls. They are made
@ -755,7 +755,7 @@ class TestUtils(unittest.TestCase):
utils._get_account_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_get_container_details_from_fs(self):
def test_get_account_details_from_fs(self):
orig_cwd = os.getcwd()
td = tempfile.mkdtemp()
try:
@ -779,7 +779,7 @@ class TestUtils(unittest.TestCase):
assert cd.obj_list == []
assert cd.dir_list == []
def test_get_account_details_from_fs(self):
def test_get_container_details_from_fs(self):
orig_cwd = os.getcwd()
td = tempfile.mkdtemp()
try:
@ -788,12 +788,13 @@ class TestUtils(unittest.TestCase):
tf.extractall()
cd = utils._get_container_details_from_fs(td)
assert cd.bytes_used == 30, repr(cd.bytes_used)
assert cd.bytes_used == 0, repr(cd.bytes_used)
assert cd.object_count == 8, repr(cd.object_count)
assert set(cd.obj_list) == set(['file1', 'file3', 'file2',
'dir3', 'dir1', 'dir2',
'dir1/file1', 'dir1/file2'
]), repr(cd.obj_list)
full_dir1 = os.path.join(td, 'dir1')
full_dir2 = os.path.join(td, 'dir2')
full_dir3 = os.path.join(td, 'dir3')
@ -809,6 +810,42 @@ class TestUtils(unittest.TestCase):
os.chdir(orig_cwd)
shutil.rmtree(td)
def test_get_container_details_from_fs_do_getsize_true(self):
orig_cwd = os.getcwd()
td = tempfile.mkdtemp()
try:
tf = tarfile.open("common/data/container_tree.tar.bz2", "r:bz2")
os.chdir(td)
tf.extractall()
__do_getsize = Glusterfs._do_getsize
Glusterfs._do_getsize = True
cd = utils._get_container_details_from_fs(td)
assert cd.bytes_used == 30, repr(cd.bytes_used)
assert cd.object_count == 8, repr(cd.object_count)
assert set(cd.obj_list) == set(['file1', 'file3', 'file2',
'dir3', 'dir1', 'dir2',
'dir1/file1', 'dir1/file2'
]), repr(cd.obj_list)
full_dir1 = os.path.join(td, 'dir1')
full_dir2 = os.path.join(td, 'dir2')
full_dir3 = os.path.join(td, 'dir3')
exp_dir_dict = { td: os.path.getmtime(td),
full_dir1: os.path.getmtime(full_dir1),
full_dir2: os.path.getmtime(full_dir2),
full_dir3: os.path.getmtime(full_dir3),
}
for d,m in cd.dir_list:
assert d in exp_dir_dict
assert exp_dir_dict[d] == m
finally:
Glusterfs._do_getsize = __do_getsize
os.chdir(orig_cwd)
shutil.rmtree(td)
def test_get_account_details_from_fs_notadir_w_stats(self):
tf = tempfile.NamedTemporaryFile()
ad = utils._get_account_details_from_fs(tf.name, os.stat(tf.name))