set.c: use contiguous memory to facilitate linear search

Recently I tried to implement another data structure similar to SVR2 buffer cache [Bach 1986], but the code got too complicated. So I still maintain that, for small cache sizes, linear search is okay. Dennis Ritchie famously argued that a linear search of a directory is efficient because it is bounded by the size of the directory [Ibid., p. 76]. Great minds think alike (and share similar views on a linear search). What can make the search slow, however, is not the loop per se, but rather memory loads: on average, about 67% entries have to be loaded (assuming 67% hit ratio), checked for entry->hash, and most probably followed by entry->next. With malloc'd cache entries, memory loads can be slow. To facilitate the search, this change introduces new structure "cache_hdr", which has only 3 members necessary for the search. The structures are pre-allocated in contiguous memory block. This must play nice with CPU caches, resulting in fewer memory loads and faster searches. Indeed, based on some measurements of "apt-shell <<<unmet", this change can demonstrate about 2% overall improvement in user time. Using more sophisticated SVR2-like data structure further improves the result only by about %0.5.
2012-02-11 04:13:58 +04:00 · 2012-02-11 04:13:58 +04:00 · 5d0932c8a0
commit 5d0932c8a0
parent c3f705993b
1 changed files with 38 additions and 32 deletions
--- a/lib/set.c
+++ b/lib/set.c
@ -914,75 +914,81 @@ int decode_set(const char *str, int Mshift, unsigned *v)
 static
 int cache_decode_set(const char *str, int Mshift, const unsigned **pv)
 {
-    const int cache_size = 160;
-    const int pivot_size = 160 - 11;
    struct cache_ent {
-	struct cache_ent *next;
 	char *str;
 	int len;
-	unsigned hash;
 	int c;
 	unsigned v[];
    };
+    struct cache_hdr {
+	struct cache_hdr *next;
+	struct cache_ent *ent;
+	unsigned hash;
+    };
+#define CACHE_SIZE 160
+#define PIVOT_SIZE 149
    static __thread
-    struct cache_ent *cache;
+    struct cache_hdr cache_buf[CACHE_SIZE], *cache;
    // lookup in the cache
-    struct cache_ent *cur = cache, *prev = NULL;
-    struct cache_ent *pivot_cur = NULL, *pivot_prev = NULL;
+    struct cache_ent *ent;
+    struct cache_hdr *cur = cache, *prev = NULL;
+    struct cache_hdr *pivot_cur = NULL, *pivot_prev = NULL;
    unsigned hash = str[0] | (str[2] << 8) | (str[3] << 16);
    int count = 0;
    while (cur) {
-	if (hash == cur->hash && memcmp(str, cur->str, cur->len + 1) == 0) {
-	    // hit, move to front
-	    if (cur != cache) {
-		prev->next = cur->next;
-		cur->next = cache;
-		cache = cur;
+	if (hash == cur->hash) {
+	    ent = cur->ent;
+	    if (memcmp(str, ent->str, ent->len + 1) == 0) {
+		// hit, move to front
+		if (cur != cache) {
+		    prev->next = cur->next;
+		    cur->next = cache;
+		    cache = cur;
+		}
+		*pv = ent->v;
+		return ent->c;
 	    }
-	    *pv = cur->v;
-	    return cur->c;
 	}
 	count++;
 	if (cur->next == NULL)
 	    break;
 	prev = cur;
 	cur = cur->next;
-	if (count == pivot_size) {
+	if (count == PIVOT_SIZE) {
 	    pivot_cur = cur;
 	    pivot_prev = prev;
 	}
    }
-    // truncate
-    if (count >= cache_size) {
-	free(cur);
-	prev->next = NULL;
-    }
    // decode
    int len = strlen(str);
    int c = decode_set_size(len, Mshift);
-    cur = malloc(sizeof(*cur) + len + 1 + (c + 1) * sizeof(**pv));
-    assert(cur);
-    c = cur->c = decode_set(str, Mshift, cur->v);
+    ent = malloc(sizeof(*ent) + len + 1 + (c + 1) * sizeof(unsigned));
+    assert(ent);
+    c = ent->c = decode_set(str, Mshift, ent->v);
    if (c <= 0) {
-	free(cur);
+	free(ent);
 	return c;
    }
-    cur->v[c] = ~0u;
-    cur->str = (char *)(cur->v + c + 1);
-    memcpy(cur->str, str, len + 1);
-    cur->len = len;
-    cur->hash = hash;
+    ent->v[c] = ~0u;
+    ent->str = (char *)(ent->v + c + 1);
+    memcpy(ent->str, str, len + 1);
+    ent->len = len;
    // pivotal insertion!
-    if (count >= cache_size) {
+    if (count >= CACHE_SIZE) {
+	free(cur->ent);
+	prev->next = NULL;
 	cur->next = pivot_cur;
 	pivot_prev->next = cur;
    }
    // early bird, push to front
    else {
+	cur = &cache_buf[count];
 	cur->next = cache;
 	cache = cur;
    }
-    *pv = cur->v;
+    cur->ent = ent;
+    cur->hash = hash;
+    *pv = ent->v;
    return c;
 }