ceph: make object hash a pg_pool property
The object will be hashed to a placement seed (ps) based on the pg_pool's hash function. This allows new hashes to be introduced into an existing object store, or selection of a hash appropriate to the objects that will be stored in a particular pool. Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
parent
cfbbcd24a6
commit
1654dd0cf5
@ -13,7 +13,7 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \
|
||||
mon_client.o \
|
||||
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
|
||||
debugfs.o \
|
||||
ceph_fs.o ceph_strings.o ceph_frag.o
|
||||
ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o
|
||||
|
||||
else
|
||||
#Otherwise we were called directly from the command
|
||||
|
@ -10,6 +10,8 @@ src/include/rados.h fs/ceph/rados.h
|
||||
src/include/ceph_strings.cc fs/ceph/ceph_strings.c
|
||||
src/include/ceph_frag.h fs/ceph/ceph_frag.h
|
||||
src/include/ceph_frag.cc fs/ceph/ceph_frag.c
|
||||
src/include/ceph_hash.h fs/ceph/ceph_hash.h
|
||||
src/include/ceph_hash.cc fs/ceph/ceph_hash.c
|
||||
src/crush/crush.c fs/ceph/crush/crush.c
|
||||
src/crush/crush.h fs/ceph/crush/crush.h
|
||||
src/crush/mapper.c fs/ceph/crush/mapper.c
|
||||
|
@ -72,80 +72,3 @@ int ceph_caps_for_mode(int mode)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Robert Jenkin's hash function.
|
||||
* http://burtleburtle.net/bob/hash/evahash.html
|
||||
* This is in the public domain.
|
||||
*/
|
||||
#define mix(a, b, c) \
|
||||
do { \
|
||||
a = a - b; a = a - c; a = a ^ (c >> 13); \
|
||||
b = b - c; b = b - a; b = b ^ (a << 8); \
|
||||
c = c - a; c = c - b; c = c ^ (b >> 13); \
|
||||
a = a - b; a = a - c; a = a ^ (c >> 12); \
|
||||
b = b - c; b = b - a; b = b ^ (a << 16); \
|
||||
c = c - a; c = c - b; c = c ^ (b >> 5); \
|
||||
a = a - b; a = a - c; a = a ^ (c >> 3); \
|
||||
b = b - c; b = b - a; b = b ^ (a << 10); \
|
||||
c = c - a; c = c - b; c = c ^ (b >> 15); \
|
||||
} while (0)
|
||||
|
||||
unsigned int ceph_full_name_hash(const char *str, unsigned int length)
|
||||
{
|
||||
const unsigned char *k = (const unsigned char *)str;
|
||||
__u32 a, b, c; /* the internal state */
|
||||
__u32 len; /* how many key bytes still need mixing */
|
||||
|
||||
/* Set up the internal state */
|
||||
len = length;
|
||||
a = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
||||
b = a;
|
||||
c = 0; /* variable initialization of internal state */
|
||||
|
||||
/* handle most of the key */
|
||||
while (len >= 12) {
|
||||
a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) +
|
||||
((__u32)k[3] << 24));
|
||||
b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) +
|
||||
((__u32)k[7] << 24));
|
||||
c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) +
|
||||
((__u32)k[11] << 24));
|
||||
mix(a, b, c);
|
||||
k = k + 12;
|
||||
len = len - 12;
|
||||
}
|
||||
|
||||
/* handle the last 11 bytes */
|
||||
c = c + length;
|
||||
switch (len) { /* all the case statements fall through */
|
||||
case 11:
|
||||
c = c + ((__u32)k[10] << 24);
|
||||
case 10:
|
||||
c = c + ((__u32)k[9] << 16);
|
||||
case 9:
|
||||
c = c + ((__u32)k[8] << 8);
|
||||
/* the first byte of c is reserved for the length */
|
||||
case 8:
|
||||
b = b + ((__u32)k[7] << 24);
|
||||
case 7:
|
||||
b = b + ((__u32)k[6] << 16);
|
||||
case 6:
|
||||
b = b + ((__u32)k[5] << 8);
|
||||
case 5:
|
||||
b = b + k[4];
|
||||
case 4:
|
||||
a = a + ((__u32)k[3] << 24);
|
||||
case 3:
|
||||
a = a + ((__u32)k[2] << 16);
|
||||
case 2:
|
||||
a = a + ((__u32)k[1] << 8);
|
||||
case 1:
|
||||
a = a + k[0];
|
||||
/* case 0: nothing left to add */
|
||||
}
|
||||
mix(a, b, c);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,6 @@
|
||||
#define CEPH_MAX_MON 31
|
||||
|
||||
|
||||
unsigned int ceph_full_name_hash(const char *name, unsigned int len);
|
||||
|
||||
|
||||
/*
|
||||
* ceph_file_layout - describe data layout for a file/inode
|
||||
|
118
fs/ceph/ceph_hash.c
Normal file
118
fs/ceph/ceph_hash.c
Normal file
@ -0,0 +1,118 @@
|
||||
|
||||
#include "types.h"
|
||||
|
||||
/*
|
||||
* Robert Jenkin's hash function.
|
||||
* http://burtleburtle.net/bob/hash/evahash.html
|
||||
* This is in the public domain.
|
||||
*/
|
||||
#define mix(a, b, c) \
|
||||
do { \
|
||||
a = a - b; a = a - c; a = a ^ (c >> 13); \
|
||||
b = b - c; b = b - a; b = b ^ (a << 8); \
|
||||
c = c - a; c = c - b; c = c ^ (b >> 13); \
|
||||
a = a - b; a = a - c; a = a ^ (c >> 12); \
|
||||
b = b - c; b = b - a; b = b ^ (a << 16); \
|
||||
c = c - a; c = c - b; c = c ^ (b >> 5); \
|
||||
a = a - b; a = a - c; a = a ^ (c >> 3); \
|
||||
b = b - c; b = b - a; b = b ^ (a << 10); \
|
||||
c = c - a; c = c - b; c = c ^ (b >> 15); \
|
||||
} while (0)
|
||||
|
||||
unsigned ceph_str_hash_rjenkins(const char *str, unsigned length)
|
||||
{
|
||||
const unsigned char *k = (const unsigned char *)str;
|
||||
__u32 a, b, c; /* the internal state */
|
||||
__u32 len; /* how many key bytes still need mixing */
|
||||
|
||||
/* Set up the internal state */
|
||||
len = length;
|
||||
a = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
||||
b = a;
|
||||
c = 0; /* variable initialization of internal state */
|
||||
|
||||
/* handle most of the key */
|
||||
while (len >= 12) {
|
||||
a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) +
|
||||
((__u32)k[3] << 24));
|
||||
b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) +
|
||||
((__u32)k[7] << 24));
|
||||
c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) +
|
||||
((__u32)k[11] << 24));
|
||||
mix(a, b, c);
|
||||
k = k + 12;
|
||||
len = len - 12;
|
||||
}
|
||||
|
||||
/* handle the last 11 bytes */
|
||||
c = c + length;
|
||||
switch (len) { /* all the case statements fall through */
|
||||
case 11:
|
||||
c = c + ((__u32)k[10] << 24);
|
||||
case 10:
|
||||
c = c + ((__u32)k[9] << 16);
|
||||
case 9:
|
||||
c = c + ((__u32)k[8] << 8);
|
||||
/* the first byte of c is reserved for the length */
|
||||
case 8:
|
||||
b = b + ((__u32)k[7] << 24);
|
||||
case 7:
|
||||
b = b + ((__u32)k[6] << 16);
|
||||
case 6:
|
||||
b = b + ((__u32)k[5] << 8);
|
||||
case 5:
|
||||
b = b + k[4];
|
||||
case 4:
|
||||
a = a + ((__u32)k[3] << 24);
|
||||
case 3:
|
||||
a = a + ((__u32)k[2] << 16);
|
||||
case 2:
|
||||
a = a + ((__u32)k[1] << 8);
|
||||
case 1:
|
||||
a = a + k[0];
|
||||
/* case 0: nothing left to add */
|
||||
}
|
||||
mix(a, b, c);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* linux dcache hash
|
||||
*/
|
||||
unsigned ceph_str_hash_linux(const char *str, unsigned length)
|
||||
{
|
||||
unsigned long hash = 0;
|
||||
unsigned char c;
|
||||
|
||||
while (length-- > 0) {
|
||||
c = *str++;
|
||||
hash = (hash + (c << 4) + (c >> 4)) * 11;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
unsigned ceph_str_hash(int type, const char *s, unsigned len)
|
||||
{
|
||||
switch (type) {
|
||||
case CEPH_STR_HASH_LINUX:
|
||||
return ceph_str_hash_linux(s, len);
|
||||
case CEPH_STR_HASH_RJENKINS:
|
||||
return ceph_str_hash_rjenkins(s, len);
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
const char *ceph_str_hash_name(int type)
|
||||
{
|
||||
switch (type) {
|
||||
case CEPH_STR_HASH_LINUX:
|
||||
return "linux";
|
||||
case CEPH_STR_HASH_RJENKINS:
|
||||
return "rjenkins";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
13
fs/ceph/ceph_hash.h
Normal file
13
fs/ceph/ceph_hash.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef _FS_CEPH_HASH_H
|
||||
#define _FS_CEPH_HASH_H
|
||||
|
||||
#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */
|
||||
#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */
|
||||
|
||||
extern unsigned ceph_str_hash_linux(const char *s, unsigned len);
|
||||
extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len);
|
||||
|
||||
extern unsigned ceph_str_hash(int type, const char *s, unsigned len);
|
||||
extern const char *ceph_str_hash_name(int type);
|
||||
|
||||
#endif
|
@ -809,7 +809,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
|
||||
return -EIO;
|
||||
|
||||
pool = &osdmap->pg_pool[poolid];
|
||||
ps = ceph_full_name_hash(oid, strlen(oid));
|
||||
ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
|
||||
if (preferred >= 0) {
|
||||
ps += preferred;
|
||||
num = le32_to_cpu(pool->v.lpg_num);
|
||||
|
@ -84,6 +84,7 @@ struct ceph_pg_pool {
|
||||
__u8 type; /* CEPH_PG_TYPE_* */
|
||||
__u8 size; /* number of osds in each pg */
|
||||
__u8 crush_ruleset; /* crush placement rule */
|
||||
__u8 object_hash; /* hash mapping object name to ps */
|
||||
__le32 pg_num, pgp_num; /* number of pg's */
|
||||
__le32 lpg_num, lpgp_num; /* number of localized pg's */
|
||||
__le32 last_change; /* most recent epoch changed */
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "ceph_fs.h"
|
||||
#include "ceph_frag.h"
|
||||
#include "ceph_hash.h"
|
||||
|
||||
/*
|
||||
* Identify inodes by both their ino AND snapshot id (a u64).
|
||||
|
Loading…
Reference in New Issue
Block a user