xref: /6.0.3/moxi/src/slabs.c (revision d0366df5)
1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
4 * and are divided into chunks. The chunk sizes start off at the size of the
5 * "item" structure plus space for a small key and value. They increase by
6 * a multiplier factor from there, up to half the maximum slab size. The last
7 * slab size is always 1MB, since that's the maximum item size allowed by the
8 * memcached protocol.
9 */
10#include "memcached.h"
11#include <errno.h>
12#include <stdlib.h>
13#include <stdio.h>
14#include <string.h>
15#include <platform/cbassert.h>
16#include "log.h"
17
18/* powers-of-N allocation structures */
19
20typedef struct {
21    unsigned int size;      /* sizes of items */
22    unsigned int perslab;   /* how many items per slab */
23
24    void **slots;           /* list of item ptrs */
25    unsigned int sl_total;  /* size of previous array */
26    unsigned int sl_curr;   /* first free slot */
27
28    void *end_page_ptr;         /* pointer to next free item at end of page, or 0 */
29    unsigned int end_page_free; /* number of items remaining at end of last alloced page */
30
31    unsigned int slabs;     /* how many slabs were allocated for this class */
32
33    void **slab_list;       /* array of slab pointers */
34    unsigned int list_size; /* size of prev array */
35
36    unsigned int killing;  /* index+1 of dying slab, or zero if none */
37    size_t requested; /* The number of requested bytes */
38} slabclass_t;
39
40static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];
41static size_t mem_limit = 0;
42static size_t mem_malloced = 0;
43static size_t power_largest;
44
45static void *mem_base = NULL;
46static void *mem_current = NULL;
47static size_t mem_avail = 0;
48
49/**
50 * Access to the slab allocator is protected by this lock
51 */
52static cb_mutex_t slabs_lock;
53
54/*
55 * Forward Declarations
56 */
57static int do_slabs_newslab(const unsigned int id);
58static void *memory_allocate(size_t size);
59
60#ifndef DONT_PREALLOC_SLABS
61/* Preallocate as many slab pages as possible (called from slabs_init)
62   on start-up, so users don't get confused out-of-memory errors when
63   they do have free (in-slab) space, but no space to make new slabs.
64   if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
65   slab types can be made.  if max memory is less than 18 MB, only the
66   smaller ones will be made.  */
67static void slabs_preallocate (const unsigned int maxslabs);
68#endif
69
70/*
71 * Figures out which slab class (chunk size) is required to store an item of
72 * a given size.
73 *
74 * Given object size, return id to use when allocating/freeing memory for object
75 * 0 means error: can't store such a large object
76 */
77
78unsigned int slabs_clsid(const size_t size) {
79    size_t res = POWER_SMALLEST;
80
81    if (size == 0)
82        return 0;
83    while (size > slabclass[res].size)
84        if (res++ == power_largest)     /* won't fit in the biggest slab */
85            return 0;
86    return res;
87}
88
89/**
90 * Determines the chunk sizes and initializes the slab class descriptors
91 * accordingly.
92 */
93void slabs_init(const size_t limit, const double factor, const bool prealloc) {
94    cb_mutex_initialize(&slabs_lock);
95    int i = POWER_SMALLEST - 1;
96    unsigned int size = sizeof(item) + settings.chunk_size;
97
98    /* Factor of 2.0 means use the default memcached behavior */
99    if (factor == 2.0 && size < 128)
100        size = 128;
101
102    mem_limit = limit;
103
104    if (prealloc) {
105        /* Allocate everything in a big chunk with malloc */
106        mem_base = malloc(mem_limit);
107        if (mem_base != NULL) {
108            mem_current = mem_base;
109            mem_avail = mem_limit;
110        } else {
111            moxi_log_write("Warning: Failed to allocate requested memory in"
112                    " one large chunk.\nWill allocate in smaller chunks\n");
113        }
114    }
115
116    memset(slabclass, 0, sizeof(slabclass));
117
118    while (++i < POWER_LARGEST && size <= POWER_BLOCK / 2) {
119        /* Make sure items are always n-byte aligned */
120        if (size % CHUNK_ALIGN_BYTES)
121            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
122
123        slabclass[i].size = size;
124        slabclass[i].perslab = POWER_BLOCK / slabclass[i].size;
125        size *= factor;
126        if (settings.verbose > 1) {
127            moxi_log_write("slab class %3d: chunk size %6u perslab %5u\n",
128                    i, slabclass[i].size, slabclass[i].perslab);
129        }
130    }
131
132    power_largest = i;
133    slabclass[power_largest].size = POWER_BLOCK;
134    slabclass[power_largest].perslab = 1;
135
136    /* for the test suite:  faking of how much we've already malloc'd */
137    {
138        char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
139        if (t_initial_malloc) {
140            mem_malloced = (size_t)atol(t_initial_malloc);
141        }
142
143    }
144
145#ifndef DONT_PREALLOC_SLABS
146    {
147        char *pre_alloc = getenv("T_MEMD_SLABS_ALLOC");
148
149        if (pre_alloc == NULL || atoi(pre_alloc) != 0) {
150            slabs_preallocate(power_largest);
151        }
152    }
153#endif
154}
155
156#ifndef DONT_PREALLOC_SLABS
157static void slabs_preallocate (const unsigned int maxslabs) {
158    int i;
159    unsigned int prealloc = 0;
160
161    /* pre-allocate a 1MB slab in every size class so people don't get
162       confused by non-intuitive "SERVER_ERROR out of memory"
163       messages.  this is the most common question on the mailing
164       list.  if you really don't want this, you can rebuild without
165       these three lines.  */
166
167    for (i = POWER_SMALLEST; i <= POWER_LARGEST; i++) {
168        if (++prealloc > maxslabs)
169            return;
170        do_slabs_newslab(i);
171    }
172
173}
174#endif
175
176static int grow_slab_list (const unsigned int id) {
177    slabclass_t *p = &slabclass[id];
178    if (p->slabs == p->list_size) {
179        size_t new_size =  (p->list_size != 0) ? p->list_size * 2 : 16;
180        void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
181        if (new_list == 0) return 0;
182        p->list_size = new_size;
183        p->slab_list = new_list;
184    }
185    return 1;
186}
187
188static int do_slabs_newslab(const unsigned int id) {
189    slabclass_t *p = &slabclass[id];
190#ifdef ALLOW_SLABS_REASSIGN
191    int len = POWER_BLOCK;
192#else
193    int len = p->size * p->perslab;
194#endif
195    char *ptr;
196
197    if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0) ||
198        (grow_slab_list(id) == 0) ||
199        ((ptr = memory_allocate((size_t)len)) == 0)) {
200
201        MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
202        return 0;
203    }
204
205    memset(ptr, 0, (size_t)len);
206    p->end_page_ptr = ptr;
207    p->end_page_free = p->perslab;
208
209    p->slab_list[p->slabs++] = ptr;
210    mem_malloced += len;
211    MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);
212
213    return 1;
214}
215
216/*@null@*/
217static void *do_slabs_alloc(const size_t size, unsigned int id) {
218    slabclass_t *p;
219    void *ret = NULL;
220
221    if (id < POWER_SMALLEST || id > power_largest) {
222        MEMCACHED_SLABS_ALLOCATE_FAILED(size, 0);
223        return NULL;
224    }
225
226    p = &slabclass[id];
227    cb_assert(p->sl_curr == 0 || ((item *)p->slots[p->sl_curr - 1])->slabs_clsid == 0);
228
229#ifdef USE_SYSTEM_MALLOC
230    if (mem_limit && mem_malloced + size > mem_limit) {
231        MEMCACHED_SLABS_ALLOCATE_FAILED(size, id);
232        return 0;
233    }
234    mem_malloced += size;
235    ret = malloc(size);
236    MEMCACHED_SLABS_ALLOCATE(size, id, 0, ret);
237    return ret;
238#endif
239
240    /* fail unless we have space at the end of a recently allocated page,
241       we have something on our freelist, or we could allocate a new page */
242    if (! (p->end_page_ptr != 0 || p->sl_curr != 0 ||
243           do_slabs_newslab(id) != 0)) {
244        /* We don't have more memory available */
245        ret = NULL;
246    } else if (p->sl_curr != 0) {
247        /* return off our freelist */
248        ret = p->slots[--p->sl_curr];
249    } else {
250        /* if we recently allocated a whole page, return from that */
251        cb_assert(p->end_page_ptr != NULL);
252        ret = p->end_page_ptr;
253        if (--p->end_page_free != 0) {
254            p->end_page_ptr = ((caddr_t)p->end_page_ptr) + p->size;
255        } else {
256            p->end_page_ptr = 0;
257        }
258    }
259
260    if (ret) {
261        p->requested += size;
262        MEMCACHED_SLABS_ALLOCATE(size, id, p->size, ret);
263    } else {
264        MEMCACHED_SLABS_ALLOCATE_FAILED(size, id);
265    }
266
267    return ret;
268}
269
270static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
271    slabclass_t *p;
272
273    cb_assert(((item *)ptr)->slabs_clsid == 0);
274    cb_assert(id >= POWER_SMALLEST && id <= power_largest);
275    if (id < POWER_SMALLEST || id > power_largest)
276        return;
277
278    MEMCACHED_SLABS_FREE(size, id, ptr);
279    p = &slabclass[id];
280
281#ifdef USE_SYSTEM_MALLOC
282    mem_malloced -= size;
283    free(ptr);
284    return;
285#endif
286
287    if (p->sl_curr == p->sl_total) { /* need more space on the free list */
288        int new_size = (p->sl_total != 0) ? p->sl_total * 2 : 16;  /* 16 is arbitrary */
289        void **new_slots = realloc(p->slots, new_size * sizeof(void *));
290        if (new_slots == 0)
291            return;
292        p->slots = new_slots;
293        p->sl_total = new_size;
294    }
295    p->slots[p->sl_curr++] = ptr;
296    p->requested -= size;
297    return;
298}
299
300static int nz_strcmp(int nzlength, const char *nz, const char *z) {
301    int zlength=strlen(z);
302    return (zlength == nzlength) && (strncmp(nz, z, zlength) == 0) ? 0 : -1;
303}
304
305bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c) {
306    bool ret = true;
307
308    if (add_stats != NULL) {
309        if (!stat_type) {
310            /* prepare general statistics for the engine */
311            APPEND_STAT("bytes", "%llu", (unsigned long long)stats.curr_bytes);
312            APPEND_STAT("curr_items", "%u", stats.curr_items);
313            APPEND_STAT("total_items", "%u", stats.total_items);
314            APPEND_STAT("evictions", "%llu",
315                        (unsigned long long)stats.evictions);
316        } else if (nz_strcmp(nkey, stat_type, "items") == 0) {
317            item_stats(add_stats, c);
318        } else if (nz_strcmp(nkey, stat_type, "slabs") == 0) {
319            slabs_stats(add_stats, c);
320        } else if (nz_strcmp(nkey, stat_type, "sizes") == 0) {
321            item_stats_sizes(add_stats, c);
322        } else {
323            ret = false;
324        }
325    } else {
326        ret = false;
327    }
328
329    return ret;
330}
331
332/*@null@*/
333static void do_slabs_stats(ADD_STAT add_stats, void *c) {
334    int i;
335    int total;
336    /* Get the per-thread stats which contain some interesting aggregates */
337    struct thread_stats thread_stats;
338    threadlocal_stats_aggregate(&thread_stats);
339
340    total = 0;
341    for (i = POWER_SMALLEST; i <= (int) power_largest; i++) {
342        slabclass_t *p = &slabclass[i];
343        if (p->slabs != 0) {
344            uint32_t perslab, slabs;
345            slabs = p->slabs;
346            perslab = p->perslab;
347
348            char key_str[STAT_KEY_LEN];
349            char val_str[STAT_VAL_LEN];
350            int klen = 0, vlen = 0;
351
352            APPEND_NUM_STAT(i, "chunk_size", "%u", p->size);
353            APPEND_NUM_STAT(i, "chunks_per_page", "%u", perslab);
354            APPEND_NUM_STAT(i, "total_pages", "%u", slabs);
355            APPEND_NUM_STAT(i, "total_chunks", "%u", slabs * perslab);
356            APPEND_NUM_STAT(i, "used_chunks", "%u",
357                            slabs*perslab - p->sl_curr - p->end_page_free);
358            APPEND_NUM_STAT(i, "free_chunks", "%u", p->sl_curr);
359            APPEND_NUM_STAT(i, "free_chunks_end", "%u", p->end_page_free);
360            APPEND_NUM_STAT(i, "mem_requested", "%llu",
361                            (unsigned long long)p->requested);
362            APPEND_NUM_STAT(i, "get_hits", "%llu",
363                    (unsigned long long)thread_stats.slab_stats[i].get_hits);
364            APPEND_NUM_STAT(i, "cmd_set", "%llu",
365                    (unsigned long long)thread_stats.slab_stats[i].set_cmds);
366            APPEND_NUM_STAT(i, "delete_hits", "%llu",
367                    (unsigned long long)thread_stats.slab_stats[i].delete_hits);
368            APPEND_NUM_STAT(i, "incr_hits", "%llu",
369                    (unsigned long long)thread_stats.slab_stats[i].incr_hits);
370            APPEND_NUM_STAT(i, "decr_hits", "%llu",
371                    (unsigned long long)thread_stats.slab_stats[i].decr_hits);
372            APPEND_NUM_STAT(i, "cas_hits", "%llu",
373                    (unsigned long long)thread_stats.slab_stats[i].cas_hits);
374            APPEND_NUM_STAT(i, "cas_badval", "%llu",
375                    (unsigned long long)thread_stats.slab_stats[i].cas_badval);
376
377            total++;
378        }
379    }
380
381    /* add overall slab stats and append terminator */
382
383    APPEND_STAT("active_slabs", "%d", total);
384    APPEND_STAT("total_malloced", "%llu", (unsigned long long)mem_malloced);
385    add_stats(NULL, 0, NULL, 0, c);
386}
387
388#ifdef ALLOW_SLABS_REASSIGN
389/* Blows away all the items in a slab class and moves its slabs to another
390   class. This is only used by the "slabs reassign" command, for manual tweaking
391   of memory allocation. It's disabled by default since it requires that all
392   slabs be the same size (which can waste space for chunk size mantissas of
393   other than 2.0).
394   1 = success
395   0 = fail
396   -1 = tried. busy. send again shortly. */
397int do_slabs_reassign(unsigned char srcid, unsigned char dstid) {
398    void *slab, *slab_end;
399    slabclass_t *p, *dp;
400    void *iter;
401    bool was_busy = false;
402
403    if (srcid < POWER_SMALLEST || srcid > power_largest ||
404        dstid < POWER_SMALLEST || dstid > power_largest)
405        return 0;
406
407    p = &slabclass[srcid];
408    dp = &slabclass[dstid];
409
410    /* fail if src still populating, or no slab to give up in src */
411    if (p->end_page_ptr || ! p->slabs)
412        return 0;
413
414    /* fail if dst is still growing or we can't make room to hold its new one */
415    if (dp->end_page_ptr || ! grow_slab_list(dstid))
416        return 0;
417
418    if (p->killing == 0) p->killing = 1;
419
420    slab = p->slab_list[p->killing - 1];
421    slab_end = (char*)slab + POWER_BLOCK;
422
423    for (iter = slab; iter < slab_end; (char*)iter += p->size) {
424        item *it = (item *)iter;
425        if (it->slabs_clsid) {
426            if (it->refcount) was_busy = true;
427            item_unlink(it);
428        }
429    }
430
431    /* go through free list and discard items that are no longer part of this slab */
432    {
433        int fi;
434        for (fi = p->sl_curr - 1; fi >= 0; fi--) {
435            if (p->slots[fi] >= slab && p->slots[fi] < slab_end) {
436                p->sl_curr--;
437                if (p->sl_curr > fi) p->slots[fi] = p->slots[p->sl_curr];
438            }
439        }
440    }
441
442    if (was_busy) return -1;
443
444    /* if good, now move it to the dst slab class */
445    p->slab_list[p->killing - 1] = p->slab_list[p->slabs - 1];
446    p->slabs--;
447    p->killing = 0;
448    dp->slab_list[dp->slabs++] = slab;
449    dp->end_page_ptr = slab;
450    dp->end_page_free = dp->perslab;
451    /* this isn't too critical, but other parts of the code do asserts to
452       make sure this field is always 0.  */
453    for (iter = slab; iter < slab_end; (char*)iter += dp->size) {
454        ((item *)iter)->slabs_clsid = 0;
455    }
456    return 1;
457}
458
459int slabs_reassign(unsigned char srcid, unsigned char dstid) {
460    int ret;
461
462    cb_mutex_enter(&slabs_lock);
463    ret = do_slabs_reassign(srcid, dstid);
464    cb_mutex_exit(&slabs_lock);
465    return ret;
466}
467#endif
468
469static void *memory_allocate(size_t size) {
470    void *ret;
471
472    if (mem_base == NULL) {
473        /* We are not using a preallocated large memory chunk */
474        ret = malloc(size);
475    } else {
476        ret = mem_current;
477
478        if (size > mem_avail) {
479            return NULL;
480        }
481
482        /* mem_current pointer _must_ be aligned!!! */
483        if (size % CHUNK_ALIGN_BYTES) {
484            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
485        }
486
487        mem_current = ((char*)mem_current) + size;
488        if (size < mem_avail) {
489            mem_avail -= size;
490        } else {
491            mem_avail = 0;
492        }
493    }
494
495    return ret;
496}
497
498void *slabs_alloc(size_t size, unsigned int id) {
499    void *ret;
500
501    cb_mutex_enter(&slabs_lock);
502    ret = do_slabs_alloc(size, id);
503    cb_mutex_exit(&slabs_lock);
504    return ret;
505}
506
507void slabs_free(void *ptr, size_t size, unsigned int id) {
508    cb_mutex_enter(&slabs_lock);
509    do_slabs_free(ptr, size, id);
510    cb_mutex_exit(&slabs_lock);
511}
512
513void slabs_stats(ADD_STAT add_stats, void *c) {
514    cb_mutex_enter(&slabs_lock);
515    do_slabs_stats(add_stats, c);
516    cb_mutex_exit(&slabs_lock);
517}
518