xref: /4.0.0/forestdb/src/blockcache.cc (revision 5f7821ba)
1/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 *     Copyright 2010 Couchbase, Inc
4 *
5 *   Licensed under the Apache License, Version 2.0 (the "License");
6 *   you may not use this file except in compliance with the License.
7 *   You may obtain a copy of the License at
8 *
9 *       http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *   Unless required by applicable law or agreed to in writing, software
12 *   distributed under the License is distributed on an "AS IS" BASIS,
13 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *   See the License for the specific language governing permissions and
15 *   limitations under the License.
16 */
17
18#include <stdlib.h>
19#include <string.h>
20#include <stdint.h>
21
22#if !defined(WIN32) && !defined(_WIN32)
23#include <sys/time.h>
24#endif
25
26#include "hash_functions.h"
27#include "common.h"
28#include "libforestdb/fdb_errors.h"
29#include "hash.h"
30#include "list.h"
31#include "blockcache.h"
32#include "avltree.h"
33#include "atomic.h"
34
35#include "memleak.h"
36
37#ifdef __DEBUG
38#ifndef __DEBUG_BCACHE
39    #undef DBG
40    #undef DBGCMD
41    #undef DBGSW
42    #define DBG(...)
43    #define DBGCMD(...)
44    #define DBGSW(n, ...)
45#endif
46#endif
47
48// global lock
49static spin_t bcache_lock;
50
51// hash table for filename
52static struct hash fnamedic;
53
54// free block list
55static volatile size_t freelist_count;
56static struct list freelist;
57static spin_t freelist_lock;
58
59static const size_t MAX_VICTIM_SELECTIONS = 5;
60
61// file structure list
62static size_t num_files;
63static size_t file_array_capacity;
64static fnamedic_item ** file_list;
65static struct list file_zombies;
66static rw_spin_t filelist_lock; // Reader-Writer spinlock for the file list.
67
68//static struct list cleanlist, dirtylist;
69//static uint64_t nfree, nclean, ndirty;
70static uint64_t bcache_nblock;
71
72static int bcache_blocksize;
73static size_t bcache_flush_unit;
74
75struct bcache_shard {
76    spin_t lock;
77    // list for clean blocks
78    struct list cleanlist;
79    // tree for normal dirty blocks
80    struct avl_tree tree;
81    // tree for index nodes
82    struct avl_tree tree_idx;
83    // hash table for block lookup
84    struct hash hashtable;
85    // list elem for shard LRU
86    struct list_elem le;
87};
88
89struct fnamedic_item {
90    char *filename;
91    uint16_t filename_len;
92    uint32_t hash;
93
94    // current opened filemgr instance
95    // (can be changed on-the-fly when file is closed and re-opened)
96    struct filemgr *curfile;
97
98    // Shards of the block cache for a file.
99    struct bcache_shard *shards;
100
101    // list elem for FILE_ZOMBIE
102    struct list_elem le;
103    // hash elem for FNAMEDIC
104    struct hash_elem hash_elem;
105
106    atomic_uint32_t ref_count;
107    atomic_uint64_t nvictim;
108    atomic_uint64_t nitems;
109    atomic_uint64_t access_timestamp;
110    size_t num_shards;
111};
112
113#define BCACHE_DIRTY (0x1)
114#define BCACHE_FREE (0x4)
115
116struct bcache_item {
117    // BID
118    bid_t bid;
119    // contents address
120    void *addr;
121    // hash elem for lookup hash table
122    struct hash_elem hash_elem;
123    // list elem for {free, clean, dirty} lists
124    struct list_elem list_elem;
125    // flag
126    uint8_t flag;
127    // score
128    uint8_t score;
129};
130
131struct dirty_item {
132    struct bcache_item *item;
133    struct avl_node avl;
134};
135
136INLINE int _dirty_cmp(struct avl_node *a, struct avl_node *b, void *aux)
137{
138    struct dirty_item *aa, *bb;
139    aa = _get_entry(a, struct dirty_item, avl);
140    bb = _get_entry(b, struct dirty_item, avl);
141
142    #ifdef __BIT_CMP
143        return _CMP_U64(aa->item->bid , bb->item->bid);
144
145    #else
146        if (aa->item->bid < bb->item->bid) return -1;
147        else if (aa->item->bid > bb->item->bid) return 1;
148        else return 0;
149
150    #endif
151}
152
153INLINE uint32_t _fname_hash(struct hash *hash, struct hash_elem *e)
154{
155    struct fnamedic_item *item;
156    item = _get_entry(e, struct fnamedic_item, hash_elem);
157    return item->hash % ((unsigned)(BCACHE_NDICBUCKET));
158}
159
160INLINE int _fname_cmp(struct hash_elem *a, struct hash_elem *b)
161{
162    size_t len;
163    struct fnamedic_item *aa, *bb;
164    aa = _get_entry(a, struct fnamedic_item, hash_elem);
165    bb = _get_entry(b, struct fnamedic_item, hash_elem);
166
167    if (aa->filename_len == bb->filename_len) {
168        return memcmp(aa->filename, bb->filename, aa->filename_len);
169    }else {
170        len = MIN(aa->filename_len , bb->filename_len);
171        int cmp = memcmp(aa->filename, bb->filename, len);
172        if (cmp != 0) return cmp;
173        else {
174            return (int)((int)aa->filename_len - (int)bb->filename_len);
175        }
176    }
177}
178
179INLINE uint32_t _bcache_hash(struct hash *hash, struct hash_elem *e)
180{
181    struct bcache_item *item = _get_entry(e, struct bcache_item, hash_elem);
182    return (item->bid) % ((uint32_t)BCACHE_NBUCKET);
183}
184
185INLINE int _bcache_cmp(struct hash_elem *a, struct hash_elem *b)
186{
187    struct bcache_item *aa, *bb;
188    aa = _get_entry(a, struct bcache_item, hash_elem);
189    bb = _get_entry(b, struct bcache_item, hash_elem);
190
191    #ifdef __BIT_CMP
192
193        return _CMP_U64(aa->bid, bb->bid);
194
195    #else
196
197        if (aa->bid == bb->bid) return 0;
198        else if (aa->bid < bb->bid) return -1;
199        else return 1;
200
201    #endif
202}
203
204#define _list_empty(list) (list.head == NULL)
205#define _tree_empty(tree) (tree.root == NULL)
206
207static void _acquire_all_shard_locks(struct fnamedic_item *fname) {
208    size_t i = 0;
209    for (; i < fname->num_shards; ++i) {
210        spin_lock(&fname->shards[i].lock);
211    }
212}
213
214static void _release_all_shard_locks(struct fnamedic_item *fname) {
215    size_t i = 0;
216    for (; i < fname->num_shards; ++i) {
217        spin_unlock(&fname->shards[i].lock);
218    }
219}
220
221static bool _file_empty(struct fnamedic_item *fname) {
222    bool empty = true;
223    size_t i = 0;
224    _acquire_all_shard_locks(fname);
225    for (; i < fname->num_shards; ++i) {
226        if (!(_list_empty(fname->shards[i].cleanlist) &&
227              _tree_empty(fname->shards[i].tree) &&
228              _tree_empty(fname->shards[i].tree_idx))) {
229            empty = false;
230            break;
231        }
232    }
233    _release_all_shard_locks(fname);
234    return empty;
235}
236
237INLINE bool _shard_empty(struct bcache_shard *bshard) {
238    // Caller should grab the shard lock before calling this function.
239    return _list_empty(bshard->cleanlist) &&
240           _tree_empty(bshard->tree) &&
241           _tree_empty(bshard->tree_idx);
242}
243
244struct fnamedic_item *_bcache_get_victim()
245{
246    struct fnamedic_item *ret = NULL;
247    uint64_t min_timestamp = (uint64_t) -1;
248    uint64_t victim_timestamp;
249    int victim_idx;
250    size_t num_attempts;
251
252    rw_spin_read_lock(&filelist_lock);
253    // Pick the victim that has the smallest access timestamp among files randomly selected.
254    num_attempts = num_files / 10 + 1;
255     if (num_attempts > MAX_VICTIM_SELECTIONS) {
256         num_attempts = MAX_VICTIM_SELECTIONS;
257     } else {
258         if(num_attempts == 1 && num_files > 1) {
259             ++num_attempts;
260         }
261     }
262     for (size_t i = 0; i < num_attempts && num_files; ++i) {
263         victim_idx = rand() % num_files;
264         victim_timestamp = atomic_get_uint64_t(&file_list[victim_idx]->access_timestamp);
265         if (victim_timestamp < min_timestamp &&
266             atomic_get_uint64_t(&file_list[victim_idx]->nitems)) {
267             min_timestamp = victim_timestamp;
268             ret = file_list[victim_idx];
269         }
270     }
271
272    if (ret) {
273        atomic_incr_uint32_t(&ret->ref_count);
274    }
275    rw_spin_read_unlock(&filelist_lock);
276
277    return ret;
278}
279
280static struct bcache_item *_bcache_alloc_freeblock()
281{
282    struct list_elem *e = NULL;
283    struct bcache_item *item;
284
285    spin_lock(&freelist_lock);
286    e = list_pop_front(&freelist);
287    if (e) {
288        freelist_count--;
289    }
290    spin_unlock(&freelist_lock);
291
292    if (e) {
293        item = _get_entry(e, struct bcache_item, list_elem);
294        return item;
295    }
296    return NULL;
297}
298
299static void _bcache_release_freeblock(struct bcache_item *item)
300{
301    spin_lock(&freelist_lock);
302    item->flag = BCACHE_FREE;
303    item->score = 0;
304    list_push_front(&freelist, &item->list_elem);
305    freelist_count++;
306    spin_unlock(&freelist_lock);
307}
308
309static struct fnamedic_item *_next_dead_fname_zombie(void) {
310    struct list_elem *e;
311    struct fnamedic_item *fname_item;
312    bool found = false;
313    rw_spin_write_lock(&filelist_lock);
314    e = list_begin(&file_zombies);
315    while (e) {
316        fname_item = _get_entry(e, struct fnamedic_item, le);
317        if (atomic_get_uint32_t(&fname_item->ref_count) == 0) {
318            list_remove(&file_zombies, e);
319            found = true;
320            break;
321        } else {
322            e = list_next(e);
323        }
324    }
325    rw_spin_write_unlock(&filelist_lock);
326    return found ? fname_item : NULL;
327}
328
329static void _fname_free(struct fnamedic_item *fname);
330
331static void _garbage_collect_zombie_fnames(void) {
332    struct fnamedic_item *fname_item = _next_dead_fname_zombie();
333    while (fname_item) {
334        _fname_free(fname_item);
335        fname_item = _next_dead_fname_zombie();
336    }
337}
338
339struct dirty_bid {
340    bid_t bid;
341    struct avl_node avl;
342};
343
344INLINE int _dirty_bid_cmp(struct avl_node *a, struct avl_node *b, void *aux)
345{
346    struct dirty_bid *aa, *bb;
347    aa = _get_entry(a, struct dirty_bid, avl);
348    bb = _get_entry(b, struct dirty_bid, avl);
349
350    #ifdef __BIT_CMP
351        return _CMP_U64(aa->bid , bb->bid);
352
353    #else
354        if (aa->bid < bb->bid) return -1;
355        else if (aa->bid > bb->bid) return 1;
356        else return 0;
357
358    #endif
359}
360
361static void _free_dirty_blocks(struct dirty_bid **dirty_bids, size_t n) {
362    size_t i = 0;
363    for (; i < n; ++i) {
364        if (dirty_bids[i]) {
365            mempool_free(dirty_bids[i]);
366        }
367    }
368}
369
370// Flush some consecutive or all dirty blocks for a given file and
371// move them to the clean list.
372static fdb_status _flush_dirty_blocks(struct fnamedic_item *fname_item,
373                                      bool sync, bool flush_all)
374{
375    void *buf = NULL;
376    struct list_elem *prevhead = NULL;
377    struct avl_tree *cur_tree = NULL;
378    struct avl_node *node = NULL;
379    struct dirty_bid *dbid = NULL;
380    uint64_t count = 0;
381    ssize_t ret = 0;
382    bid_t start_bid = 0, prev_bid = 0;
383    void *ptr = NULL;
384    uint8_t marker = 0x0;
385    fdb_status status = FDB_RESULT_SUCCESS;
386    bool o_direct = false;
387    bool data_block_completed = false;
388    struct avl_tree dirty_blocks; // Cross-shard dirty block list for sequential writes.
389
390    if (fname_item->curfile->config->flag & _ARCH_O_DIRECT) {
391        o_direct = true;
392    }
393
394    // scan and write back dirty blocks sequentially for O_DIRECT option.
395    if (sync && o_direct) {
396        malloc_align(buf, FDB_SECTOR_SIZE, bcache_flush_unit);
397        _acquire_all_shard_locks(fname_item);
398    }
399
400    prev_bid = start_bid = BLK_NOT_FOUND;
401    count = 0;
402
403    avl_init(&dirty_blocks, NULL);
404
405    // Try to flush the dirty data blocks first and then index blocks.
406    size_t i = 0;
407    bool consecutive_blocks = true;
408    struct dirty_bid **dirty_bids = alca(struct dirty_bid *, fname_item->num_shards);
409    memset(dirty_bids, 0x0, sizeof(dirty_bid *) * fname_item->num_shards);
410    while (1) {
411        if (!(node = avl_first(&dirty_blocks))) {
412            for (i = 0; i < fname_item->num_shards; ++i) {
413                if (!(sync && o_direct)) {
414                    spin_lock(&fname_item->shards[i].lock);
415                }
416                if (!data_block_completed) {
417                    node = avl_first(&fname_item->shards[i].tree);
418                } else {
419                    node = avl_first(&fname_item->shards[i].tree_idx);
420                }
421                if (node) {
422                    if (!dirty_bids[i]) {
423                        dirty_bids[i] = (struct dirty_bid *)
424                            mempool_alloc(sizeof(struct dirty_bid));
425                    }
426                    dirty_bids[i]->bid = _get_entry(node, struct dirty_item, avl)->item->bid;
427                    avl_insert(&dirty_blocks, &dirty_bids[i]->avl, _dirty_bid_cmp);
428                }
429                if (!(sync && o_direct)) {
430                    spin_unlock(&fname_item->shards[i].lock);
431                }
432            }
433            if (!(node = avl_first(&dirty_blocks))) {
434                if (!data_block_completed) {
435                    data_block_completed = true;
436                    if (count > 0 && !flush_all) {
437                        // Finished flushing some dirty data blocks.
438                        // Not move over to the dirty index block list because
439                        // flush_all is not requestd.
440                        break;
441                    }
442                    continue;
443                } else {
444                    break;
445                }
446            }
447        }
448
449        dbid = _get_entry(node, struct dirty_bid, avl);
450
451        size_t shard_num = dbid->bid % fname_item->num_shards;
452        if (!(sync && o_direct)) {
453            spin_lock(&fname_item->shards[shard_num].lock);
454        }
455        if (!data_block_completed) {
456            cur_tree = &fname_item->shards[shard_num].tree;
457        } else {
458            cur_tree = &fname_item->shards[shard_num].tree_idx;
459        }
460
461        struct dirty_item *dirty_block = NULL;
462        bool item_exist = false;
463        node = avl_first(cur_tree);
464        if (node) {
465            dirty_block = _get_entry(node, struct dirty_item, avl);
466            if (dbid->bid == dirty_block->item->bid) {
467                item_exist = true;
468            }
469        }
470        // remove from the cross-shard dirty block list.
471        avl_remove(&dirty_blocks, &dbid->avl);
472        if (!item_exist) {
473            // The original first item in the shard dirty block list was removed.
474            // Grab the next one from the cross-shard dirty block list.
475            if (!(sync && o_direct)) {
476                spin_unlock(&fname_item->shards[shard_num].lock);
477            }
478            continue;
479        }
480
481        consecutive_blocks = true;
482        // if BID of next dirty block is not consecutive .. stop
483        if (dirty_block->item->bid != prev_bid + 1 && prev_bid != BLK_NOT_FOUND &&
484            sync) {
485            if (flush_all) {
486                consecutive_blocks = false;
487            } else {
488                if (!(sync && o_direct)) {
489                    spin_unlock(&fname_item->shards[shard_num].lock);
490                }
491                break;
492            }
493        }
494        // set START_BID if this is the start block for a single batch write.
495        if (start_bid == BLK_NOT_FOUND) {
496            start_bid = dirty_block->item->bid;
497        }
498        // set PREV_BID and go to next block
499        prev_bid = dirty_block->item->bid;
500
501        // set PTR and get block MARKER
502        ptr = dirty_block->item->addr;
503        marker = *((uint8_t*)(ptr) + bcache_blocksize-1);
504        dirty_block->item->flag &= ~(BCACHE_DIRTY);
505        if (sync) {
506            // copy to buffer
507#ifdef __CRC32
508            if (marker == BLK_MARKER_BNODE) {
509                // b-tree node .. calculate crc32 and put it into the block
510                memset((uint8_t *)(ptr) + BTREE_CRC_OFFSET,
511                       0xff, BTREE_CRC_FIELD_LEN);
512                uint32_t crc = chksum(ptr, bcache_blocksize);
513                crc = _endian_encode(crc);
514                memcpy((uint8_t *)(ptr) + BTREE_CRC_OFFSET, &crc, sizeof(crc));
515            }
516#endif
517            if (o_direct) {
518                if (count > 0 && !consecutive_blocks) {
519                    size_t bytes_written;
520                    // Note that this path can be only executed in flush_all case.
521                    bytes_written = (size_t)fname_item->curfile->ops->pwrite(
522                                              fname_item->curfile->fd,
523                                              buf, count * bcache_blocksize,
524                                              start_bid * bcache_blocksize);
525                    if (bytes_written != count * bcache_blocksize) {
526                        count = 0;
527                        status = FDB_RESULT_WRITE_FAIL;
528                        break;
529                    }
530                    // Start a new batch again.
531                    count = 0;
532                    start_bid = dirty_block->item->bid;
533                }
534                memcpy((uint8_t *)(buf) + count*bcache_blocksize,
535                       dirty_block->item->addr, bcache_blocksize);
536            } else {
537                ret = fname_item->curfile->ops->pwrite(fname_item->curfile->fd,
538                                                       dirty_block->item->addr,
539                                                       bcache_blocksize,
540                                                       dirty_block->item->bid * bcache_blocksize);
541                if (ret != bcache_blocksize) {
542                    if (!(sync && o_direct)) {
543                        spin_unlock(&fname_item->shards[shard_num].lock);
544                    }
545                    status = FDB_RESULT_WRITE_FAIL;
546                    break;
547                }
548            }
549        }
550
551        node = avl_next(node);
552        // remove from the shard dirty block list.
553        avl_remove(cur_tree, &dirty_block->avl);
554
555        // move to the shard clean block list.
556        prevhead = fname_item->shards[shard_num].cleanlist.head;
557        (void)prevhead;
558        list_push_front(&fname_item->shards[shard_num].cleanlist,
559                        &dirty_block->item->list_elem);
560
561        fdb_assert(!(dirty_block->item->flag & BCACHE_FREE),
562                   dirty_block->item->flag, BCACHE_FREE);
563        fdb_assert(dirty_block->item->list_elem.prev == NULL &&
564                   prevhead == dirty_block->item->list_elem.next,
565                   prevhead, dirty_block->item->list_elem.next);
566        mempool_free(dirty_block);
567
568        // Get the next dirty block from the victim shard and insert it into
569        // the cross-shard dirty block list.
570        if (node) {
571            dbid->bid = _get_entry(node, struct dirty_item, avl)->item->bid;
572            avl_insert(&dirty_blocks, &dbid->avl, _dirty_bid_cmp);
573        }
574        if (!(sync && o_direct)) {
575            spin_unlock(&fname_item->shards[shard_num].lock);
576        }
577
578        count++;
579        if (count*bcache_blocksize >= bcache_flush_unit && sync) {
580            if (flush_all) {
581                if (o_direct) {
582                    ret = fname_item->curfile->ops->pwrite(fname_item->curfile->fd,
583                                                           buf, count * bcache_blocksize,
584                                                           start_bid * bcache_blocksize);
585                    if ((size_t)ret != count * bcache_blocksize) {
586                        count = 0;
587                        status = FDB_RESULT_WRITE_FAIL;
588                        break;
589                    }
590                    count = 0;
591                    start_bid = BLK_NOT_FOUND;
592                    prev_bid = BLK_NOT_FOUND;
593                }
594            } else {
595                break;
596            }
597        }
598    }
599
600    // synchronize
601    if (sync && o_direct) {
602        if (count > 0) {
603            ret = fname_item->curfile->ops->pwrite(fname_item->curfile->fd, buf,
604                                                   count * bcache_blocksize,
605                                                   start_bid * bcache_blocksize);
606            if ((size_t)ret != count * bcache_blocksize) {
607                status = FDB_RESULT_WRITE_FAIL;
608            }
609        }
610        _release_all_shard_locks(fname_item);
611        free_align(buf);
612    }
613
614    _free_dirty_blocks(dirty_bids, fname_item->num_shards);
615    return status;
616}
617
618// perform eviction
619static struct list_elem * _bcache_evict(struct fnamedic_item *curfile)
620{
621    size_t n_evict;
622    struct list_elem *e = NULL;
623    struct bcache_item *item;
624    struct fnamedic_item *victim = NULL;
625
626    // We don't need to grab the global buffer cache lock here because
627    // the file's buffer cache instance (fnamedic_item) can be freed only if
628    // there are no database handles opened for that file.
629
630    while (victim == NULL) {
631        // select a victim file
632        victim = _bcache_get_victim();
633        while(victim) {
634            // check whether this file has at least one block to be evictied
635            if (atomic_get_uint64_t(&victim->nitems)) {
636                // select this file as victim
637                break;
638            } else {
639                atomic_decr_uint32_t(&victim->ref_count);
640                victim = NULL; // Try to select a victim again
641            }
642        }
643    }
644    fdb_assert(victim, victim, NULL);
645
646    atomic_incr_uint64_t(&victim->nvictim);
647
648    // select the clean blocks from the victim file
649    n_evict = 0;
650    while(n_evict < BCACHE_EVICT_UNIT) {
651        size_t num_shards = victim->num_shards;
652        size_t i = random(num_shards);
653        bool found_victim_shard = false;
654        bcache_shard *bshard = NULL;
655
656        for (size_t to_visit = num_shards; to_visit; --to_visit) {
657            i = (i + 1) % num_shards; // Round robin over empty shards..
658            bshard = &victim->shards[i];
659            spin_lock(&bshard->lock);
660            if (_shard_empty(bshard)) {
661                spin_unlock(&bshard->lock);
662                continue;
663            }
664            e = list_pop_back(&bshard->cleanlist);
665            if(!e) {
666                spin_unlock(&bshard->lock);
667                // When the victim shard has no clean block, evict some dirty blocks
668                // from shards.
669                if (_flush_dirty_blocks(victim, true, false) != FDB_RESULT_SUCCESS) {
670                    atomic_decr_uint32_t(&victim->ref_count);
671                    return NULL;
672                }
673                continue; // Select a victim shard again.
674            }
675
676            item = _get_entry(e, struct bcache_item, list_elem);
677#ifdef __BCACHE_SECOND_CHANCE
678            // repeat until zero-score item is found
679            if (item->score == 0) {
680                found_victim_shard = true;
681                break;
682            } else {
683                // give second chance to the item
684                item->score--;
685                list_push_front(&bshard->cleanlist, &item->list_elem);
686                spin_unlock(&bshard->lock);
687            }
688#else
689            found_victim_shard = true;
690            break;
691#endif
692        }
693        if (!found_victim_shard) {
694            // We couldn't find any non-empty shards even after 'num_shards'
695            // attempts.
696            // The file is *likely* empty. Note that it is OK to return NULL
697            // even if the file is not empty because the caller will retry again.
698            atomic_decr_uint32_t(&victim->ref_count);
699            return NULL;
700        }
701
702        atomic_decr_uint64_t(&victim->nitems);
703        // remove from hash and insert into freelist
704        hash_remove(&bshard->hashtable, &item->hash_elem);
705        // add to freelist
706        _bcache_release_freeblock(item);
707        n_evict++;
708
709        spin_unlock(&bshard->lock);
710
711        if (atomic_get_uint64_t(&victim->nitems) == 0) {
712            break;
713        }
714    }
715
716    atomic_decr_uint32_t(&victim->ref_count);
717    return &item->list_elem;
718}
719
720static struct fnamedic_item * _fname_create(struct filemgr *file) {
721    // TODO: we MUST NOT directly read file sturcture
722
723    struct fnamedic_item *fname_new;
724    // Before we create a new filename entry, garbage collect zombies
725    _garbage_collect_zombie_fnames();
726    fname_new = (struct fnamedic_item *)malloc(sizeof(struct fnamedic_item));
727
728    fname_new->filename_len = strlen(file->filename);
729    fname_new->filename = (char *)malloc(fname_new->filename_len + 1);
730    memcpy(fname_new->filename, file->filename, fname_new->filename_len);
731    fname_new->filename[fname_new->filename_len] = 0;
732
733    // calculate hash value
734    fname_new->hash = chksum((void *)fname_new->filename,
735                             fname_new->filename_len);
736    fname_new->curfile = file;
737    atomic_init_uint64_t(&fname_new->nvictim, 0);
738    atomic_init_uint64_t(&fname_new->nitems, 0);
739    atomic_init_uint32_t(&fname_new->ref_count, 0);
740    atomic_init_uint64_t(&fname_new->access_timestamp, 0);
741    if (file->config->num_bcache_shards) {
742        fname_new->num_shards = file->config->num_bcache_shards;
743    } else {
744        fname_new->num_shards = DEFAULT_NUM_BCACHE_PARTITIONS;
745    }
746    // For random eviction among shards
747    randomize();
748
749    fname_new->shards = (bcache_shard *)
750        malloc(sizeof(struct bcache_shard) * fname_new->num_shards);
751    size_t i = 0;
752    for (; i < fname_new->num_shards; ++i) {
753        // initialize tree
754        avl_init(&fname_new->shards[i].tree, NULL);
755        avl_init(&fname_new->shards[i].tree_idx, NULL);
756        // initialize clean list
757        list_init(&fname_new->shards[i].cleanlist);
758        // initialize hash table
759        hash_init(&fname_new->shards[i].hashtable, BCACHE_NBUCKET,
760                  _bcache_hash, _bcache_cmp);
761        spin_init(&fname_new->shards[i].lock);
762    }
763
764    // insert into fname dictionary
765    hash_insert(&fnamedic, &fname_new->hash_elem);
766    file->bcache = fname_new;
767
768    rw_spin_write_lock(&filelist_lock);
769    if (num_files == file_array_capacity) {
770        file_array_capacity *= 2;
771        file_list = (struct fnamedic_item **) realloc(file_list, file_array_capacity);
772    }
773    file_list[num_files++] = fname_new;
774    rw_spin_write_unlock(&filelist_lock);
775
776    return fname_new;
777}
778
779static bool _fname_try_free(struct fnamedic_item *fname)
780{
781    bool ret = true;
782
783    rw_spin_write_lock(&filelist_lock);
784    // Remove from the file list array
785    bool found = false;
786    for (size_t i = 0; i < num_files; ++i) {
787        if (file_list[i] == fname) {
788            found = true;
789        }
790        if (found && (i+1 < num_files)) {
791            file_list[i] = file_list[i+1];
792        }
793    }
794    fdb_assert(found, num_files, fname->ref_count.val);
795    file_list[num_files - 1] = NULL;
796    --num_files;
797    if (atomic_get_uint32_t(&fname->ref_count) != 0) {
798        // This item is a victim by another thread's _bcache_evict()
799        list_push_front(&file_zombies, &fname->le);
800        ret = false; // Delay deletion
801    }
802
803    rw_spin_write_unlock(&filelist_lock);
804    return ret;
805}
806
807static void _fname_free(struct fnamedic_item *fname)
808{
809    // file must be empty
810    fdb_assert(_file_empty(fname), false, true);
811    uint32_t ref_count = atomic_get_uint32_t(&fname->ref_count);
812    fdb_assert(ref_count == 0, 0, ref_count);
813
814    // free hash
815    size_t i = 0;
816    for (; i < fname->num_shards; ++i) {
817        hash_free(&fname->shards[i].hashtable);
818        spin_destroy(&fname->shards[i].lock);
819    }
820
821    free(fname->shards);
822    free(fname->filename);
823    atomic_destroy_uint32_t(&fname->ref_count);
824    atomic_destroy_uint64_t(&fname->nvictim);
825    atomic_destroy_uint64_t(&fname->nitems);
826    atomic_destroy_uint64_t(&fname->access_timestamp);
827    free(fname);
828}
829
830INLINE void _bcache_set_score(struct bcache_item *item)
831{
832#ifdef __CRC32
833    uint8_t marker;
834
835    // set PTR and get block MARKER
836    marker = *((uint8_t*)(item->addr) + bcache_blocksize-1);
837    if (marker == BLK_MARKER_BNODE ) {
838        // b-tree node .. set item's score to 1
839        item->score = 1;
840    } else {
841        item->score = 0;
842    }
843#endif
844}
845
846int bcache_read(struct filemgr *file, bid_t bid, void *buf)
847{
848    struct hash_elem *h;
849    struct bcache_item *item;
850    struct bcache_item query;
851    struct fnamedic_item *fname;
852
853    // Note that we don't need to grab bcache_lock here as the block cache
854    // is already created and binded when the file is created or opened for
855    // the first time.
856    fname = file->bcache;
857
858    if (fname) {
859        // file exists
860        // set query
861        query.bid = bid;
862        // Update the access timestamp.
863        struct timeval tp;
864        gettimeofday(&tp, NULL); // TODO: Need to implement a better way of
865                                 // getting the timestamp to avoid the overhead of
866                                 // gettimeofday()
867        atomic_store_uint64_t(&fname->access_timestamp,
868                              (uint64_t) (tp.tv_sec * 1000000 + tp.tv_usec));
869
870        size_t shard_num = bid % fname->num_shards;
871        spin_lock(&fname->shards[shard_num].lock);
872
873        // search shard hash table
874        h = hash_find(&fname->shards[shard_num].hashtable, &query.hash_elem);
875        if (h) {
876            // cache hit
877            item = _get_entry(h, struct bcache_item, hash_elem);
878            fdb_assert(!(item->flag & BCACHE_FREE), item->flag, file);
879
880            // move the item to the head of list if the block is clean
881            // (don't care if the block is dirty)
882            if (!(item->flag & BCACHE_DIRTY)) {
883                // TODO: Scanning the list would cause some overhead. We need to devise
884                // the better data structure to provide a fast lookup for the clean list.
885                list_remove(&fname->shards[shard_num].cleanlist, &item->list_elem);
886                list_push_front(&fname->shards[shard_num].cleanlist, &item->list_elem);
887            }
888
889            memcpy(buf, item->addr, bcache_blocksize);
890            _bcache_set_score(item);
891
892            spin_unlock(&fname->shards[shard_num].lock);
893
894            return bcache_blocksize;
895        } else {
896            // cache miss
897            spin_unlock(&fname->shards[shard_num].lock);
898        }
899    }
900
901    // does not exist .. cache miss
902    return 0;
903}
904
905void bcache_invalidate_block(struct filemgr *file, bid_t bid)
906{
907    struct hash_elem *h;
908    struct bcache_item *item;
909    struct bcache_item query;
910    struct fnamedic_item *fname;
911
912    // Note that we don't need to grab bcache_lock here as the block cache
913    // is already created and binded when the file is created or opened for
914    // the first time.
915    fname = file->bcache;
916
917    if (fname) {
918        // file exists
919        // set query
920        query.bid = bid;
921        // Update the access timestamp.
922        struct timeval tp;
923        gettimeofday(&tp, NULL);
924        atomic_store_uint64_t(&fname->access_timestamp,
925                              (uint64_t) (tp.tv_sec * 1000000 + tp.tv_usec));
926
927        size_t shard_num = bid % fname->num_shards;
928        spin_lock(&fname->shards[shard_num].lock);
929
930        // search BHASH
931        h = hash_find(&fname->shards[shard_num].hashtable, &query.hash_elem);
932        if (h) {
933            // cache hit
934            item = _get_entry(h, struct bcache_item, hash_elem);
935            fdb_assert(!(item->flag & BCACHE_FREE), item->flag, BCACHE_FREE);
936
937            if (!(item->flag & BCACHE_DIRTY)) {
938                atomic_decr_uint64_t(&fname->nitems);
939                // only for clean blocks
940                // remove from hash and insert into freelist
941                hash_remove(&fname->shards[shard_num].hashtable, &item->hash_elem);
942                // remove from clean list
943                list_remove(&fname->shards[shard_num].cleanlist, &item->list_elem);
944                spin_unlock(&fname->shards[shard_num].lock);
945
946                // add to freelist
947                _bcache_release_freeblock(item);
948            } else {
949                spin_unlock(&fname->shards[shard_num].lock);
950            }
951        } else {
952            // cache miss
953            spin_unlock(&fname->shards[shard_num].lock);
954        }
955    }
956}
957
958int bcache_write(struct filemgr *file,
959                 bid_t bid,
960                 void *buf,
961                 bcache_dirty_t dirty)
962{
963    struct hash_elem *h = NULL;
964    struct bcache_item *item;
965    struct bcache_item query;
966    struct fnamedic_item *fname_new;
967
968    fname_new = file->bcache;
969    if (fname_new == NULL) {
970        spin_lock(&bcache_lock);
971        fname_new = file->bcache;
972        if (fname_new == NULL) {
973            // filename doesn't exist in filename dictionary .. create
974            fname_new = _fname_create(file);
975        }
976        spin_unlock(&bcache_lock);
977    }
978
979    // Update the access timestamp.
980    struct timeval tp;
981    gettimeofday(&tp, NULL);
982    atomic_store_uint64_t(&fname_new->access_timestamp,
983                          (uint64_t) (tp.tv_sec * 1000000 + tp.tv_usec));
984
985    size_t shard_num = bid % fname_new->num_shards;
986    // set query
987    query.bid = bid;
988
989    spin_lock(&fname_new->shards[shard_num].lock);
990
991    // search hash table
992    h = hash_find(&fname_new->shards[shard_num].hashtable, &query.hash_elem);
993    if (h == NULL) {
994        // cache miss
995        // get a free block
996        while ((item = _bcache_alloc_freeblock()) == NULL) {
997            // no free block .. perform eviction
998            spin_unlock(&fname_new->shards[shard_num].lock);
999
1000            _bcache_evict(fname_new);
1001
1002            spin_lock(&fname_new->shards[shard_num].lock);
1003        }
1004
1005        // re-search hash table
1006        h = hash_find(&fname_new->shards[shard_num].hashtable, &query.hash_elem);
1007        if (h == NULL) {
1008            // insert into hash table
1009            item->bid = bid;
1010            item->flag = BCACHE_FREE;
1011            hash_insert(&fname_new->shards[shard_num].hashtable, &item->hash_elem);
1012            h = &item->hash_elem;
1013        } else {
1014            // insert into freelist again
1015            _bcache_release_freeblock(item);
1016            item = _get_entry(h, struct bcache_item, hash_elem);
1017        }
1018    } else {
1019        item = _get_entry(h, struct bcache_item, hash_elem);
1020    }
1021
1022    fdb_assert(h, h, NULL);
1023
1024    if (item->flag & BCACHE_FREE) {
1025        atomic_incr_uint64_t(&fname_new->nitems);
1026    }
1027
1028    // remove from the list if the block is in clean list
1029    if (!(item->flag & BCACHE_DIRTY) && !(item->flag & BCACHE_FREE)) {
1030        list_remove(&fname_new->shards[shard_num].cleanlist, &item->list_elem);
1031    }
1032    item->flag &= ~BCACHE_FREE;
1033
1034    if (dirty == BCACHE_REQ_DIRTY) {
1035        // DIRTY request
1036        // to avoid re-insert already existing item into tree
1037        if (!(item->flag & BCACHE_DIRTY)) {
1038            // dirty block
1039            // insert into tree
1040            struct dirty_item *ditem;
1041            uint8_t marker;
1042
1043            ditem = (struct dirty_item *)
1044                    mempool_alloc(sizeof(struct dirty_item));
1045            ditem->item = item;
1046
1047            marker = *((uint8_t*)buf + bcache_blocksize-1);
1048            if (marker == BLK_MARKER_BNODE ) {
1049                // b-tree node
1050                avl_insert(&fname_new->shards[shard_num].tree_idx, &ditem->avl, _dirty_cmp);
1051            } else {
1052                avl_insert(&fname_new->shards[shard_num].tree, &ditem->avl, _dirty_cmp);
1053            }
1054        }
1055        item->flag |= BCACHE_DIRTY;
1056    } else {
1057        // CLEAN request
1058        // insert into clean list only when it was originally clean
1059        if (!(item->flag & BCACHE_DIRTY)) {
1060            list_push_front(&fname_new->shards[shard_num].cleanlist, &item->list_elem);
1061            item->flag &= ~(BCACHE_DIRTY);
1062        }
1063    }
1064
1065    memcpy(item->addr, buf, bcache_blocksize);
1066    _bcache_set_score(item);
1067
1068    spin_unlock(&fname_new->shards[shard_num].lock);
1069
1070    return bcache_blocksize;
1071}
1072
1073int bcache_write_partial(struct filemgr *file,
1074                         bid_t bid,
1075                         void *buf,
1076                         size_t offset,
1077                         size_t len)
1078{
1079    struct hash_elem *h;
1080    struct bcache_item *item;
1081    struct bcache_item query;
1082    struct fnamedic_item *fname_new;
1083
1084    fname_new = file->bcache;
1085    if (fname_new == NULL) {
1086        spin_lock(&bcache_lock);
1087        fname_new = file->bcache;
1088        if (fname_new == NULL) {
1089            // filename doesn't exist in filename dictionary .. create
1090            fname_new = _fname_create(file);
1091        }
1092        spin_unlock(&bcache_lock);
1093    }
1094
1095    // Update the access timestamp.
1096    struct timeval tp;
1097    gettimeofday(&tp, NULL);
1098    atomic_store_uint64_t(&fname_new->access_timestamp,
1099                          (uint64_t) (tp.tv_sec * 1000000 + tp.tv_usec));
1100
1101    size_t shard_num = bid % fname_new->num_shards;
1102    // set query
1103    query.bid = bid;
1104
1105    spin_lock(&fname_new->shards[shard_num].lock);
1106
1107    // search hash table
1108    h = hash_find(&fname_new->shards[shard_num].hashtable, &query.hash_elem);
1109    if (h == NULL) {
1110        // cache miss .. partial write fail .. return 0
1111        spin_unlock(&fname_new->shards[shard_num].lock);
1112        return 0;
1113
1114    } else {
1115        // cache hit .. get the block
1116        item = _get_entry(h, struct bcache_item, hash_elem);
1117    }
1118
1119    fdb_assert(!(item->flag & BCACHE_FREE), item->flag, BCACHE_FREE);
1120
1121    // check whether this is dirty block
1122    // to avoid re-insert already existing item into tree
1123    if (!(item->flag & BCACHE_DIRTY)) {
1124        // this block was clean block
1125        uint8_t marker;
1126        struct dirty_item *ditem;
1127
1128        // remove from clean list
1129        list_remove(&fname_new->shards[shard_num].cleanlist, &item->list_elem);
1130
1131        ditem = (struct dirty_item *)mempool_alloc(sizeof(struct dirty_item));
1132        ditem->item = item;
1133
1134        // insert into tree
1135        marker = *((uint8_t*)item->addr + bcache_blocksize-1);
1136        if (marker == BLK_MARKER_BNODE ) {
1137            // b-tree node
1138            avl_insert(&fname_new->shards[shard_num].tree_idx, &ditem->avl, _dirty_cmp);
1139        } else {
1140            avl_insert(&fname_new->shards[shard_num].tree, &ditem->avl, _dirty_cmp);
1141        }
1142    }
1143
1144    // always set this block as dirty
1145    item->flag |= BCACHE_DIRTY;
1146
1147    memcpy((uint8_t *)(item->addr) + offset, buf, len);
1148    _bcache_set_score(item);
1149
1150    spin_unlock(&fname_new->shards[shard_num].lock);
1151
1152    return len;
1153}
1154
1155// remove all dirty blocks of the FILE
1156// (they are only discarded and not written back)
1157void bcache_remove_dirty_blocks(struct filemgr *file)
1158{
1159    struct fnamedic_item *fname_item;
1160
1161    fname_item = file->bcache;
1162
1163    if (fname_item) {
1164        // Note that this function is only invoked as part of database file close or
1165        // removal when there are no database handles for a given file. Therefore,
1166        // we don't need to grab all the shard locks at once.
1167
1168        // remove all dirty blocks
1169        _flush_dirty_blocks(fname_item, false, true);
1170    }
1171}
1172
1173// remove all clean blocks of the FILE
1174void bcache_remove_clean_blocks(struct filemgr *file)
1175{
1176    struct list_elem *e;
1177    struct bcache_item *item;
1178    struct fnamedic_item *fname_item;
1179
1180    fname_item = file->bcache;
1181
1182    if (fname_item) {
1183        // Note that this function is only invoked as part of database file close or
1184        // removal when there are no database handles for a given file. Therefore,
1185        // we don't need to grab all the shard locks at once.
1186
1187        // remove all clean blocks from each shard in a file.
1188        size_t i = 0;
1189        for (; i < fname_item->num_shards; ++i) {
1190            spin_lock(&fname_item->shards[i].lock);
1191            e = list_begin(&fname_item->shards[i].cleanlist);
1192            while(e){
1193                item = _get_entry(e, struct bcache_item, list_elem);
1194                // remove from clean list
1195                e = list_remove(&fname_item->shards[i].cleanlist, e);
1196                // remove from hash table
1197                hash_remove(&fname_item->shards[i].hashtable, &item->hash_elem);
1198                // insert into free list
1199                _bcache_release_freeblock(item);
1200            }
1201            spin_unlock(&fname_item->shards[i].lock);
1202        }
1203    }
1204}
1205
1206// remove file from filename dictionary
1207// MUST sure that there is no dirty block belongs to this FILE
1208// (or memory leak occurs)
1209bool bcache_remove_file(struct filemgr *file)
1210{
1211    bool rv = false;
1212    struct fnamedic_item *fname_item;
1213
1214    // Before proceeding with deletion, garbage collect zombie files
1215    _garbage_collect_zombie_fnames();
1216    fname_item = file->bcache;
1217
1218    if (fname_item) {
1219        // acquire lock
1220        spin_lock(&bcache_lock);
1221        // file must be empty
1222        fdb_assert(_file_empty(fname_item), fname_item, NULL);
1223
1224        // remove from fname dictionary hash table
1225        hash_remove(&fnamedic, &fname_item->hash_elem);
1226        spin_unlock(&bcache_lock);
1227
1228        // We don't need to grab the file buffer cache's partition locks
1229        // at once because this function is only invoked when there are
1230        // no database handles that access the file.
1231        if (_fname_try_free(fname_item)) {
1232            _fname_free(fname_item); // no other callers accessing this file
1233            rv = true;
1234        } // else fnamedic_item is in use by _bcache_evict. Deletion delayed
1235    }
1236    return rv;
1237}
1238
1239// flush and synchronize all dirty blocks of the FILE
1240// dirty blocks will be changed to clean blocks (not discarded)
1241fdb_status bcache_flush(struct filemgr *file)
1242{
1243    struct fnamedic_item *fname_item;
1244    fdb_status status = FDB_RESULT_SUCCESS;
1245
1246    fname_item = file->bcache;
1247
1248    if (fname_item) {
1249        // Note that this function is invoked as part of a commit operation while
1250        // the filemgr's lock is already grabbed by a committer.
1251        // Therefore, we don't need to grab all the shard locks at once.
1252        status = _flush_dirty_blocks(fname_item, true, true);
1253    }
1254    return status;
1255}
1256
1257void bcache_init(int nblock, int blocksize)
1258{
1259    int i;
1260    struct bcache_item *item;
1261    struct list_elem *e;
1262
1263    list_init(&freelist);
1264    list_init(&file_zombies);
1265
1266    hash_init(&fnamedic, BCACHE_NDICBUCKET, _fname_hash, _fname_cmp);
1267
1268    bcache_blocksize = blocksize;
1269    bcache_flush_unit = BCACHE_FLUSH_UNIT;
1270    bcache_nblock = nblock;
1271    spin_init(&bcache_lock);
1272    spin_init(&freelist_lock);
1273    rw_spin_init(&filelist_lock);
1274    freelist_count = 0;
1275
1276    num_files = 0;
1277    file_array_capacity = 4096; // Initial capacity of file list array.
1278    file_list = (fnamedic_item **) calloc(file_array_capacity, sizeof(fnamedic_item *));
1279
1280    for (i=0;i<nblock;++i){
1281        item = (struct bcache_item *)malloc(sizeof(struct bcache_item));
1282
1283        item->bid = BLK_NOT_FOUND;
1284        item->flag = 0x0 | BCACHE_FREE;
1285        item->score = 0;
1286
1287        list_push_front(&freelist, &item->list_elem);
1288        freelist_count++;
1289    }
1290    e = list_begin(&freelist);
1291    while(e){
1292        item = _get_entry(e, struct bcache_item, list_elem);
1293        item->addr = (void *)malloc(bcache_blocksize);
1294        e = list_next(e);
1295    }
1296
1297}
1298
1299uint64_t bcache_get_num_free_blocks()
1300{
1301    return freelist_count;
1302}
1303
1304// LCOV_EXCL_START
1305void bcache_print_items()
1306{
1307    size_t n=1;
1308    size_t nfiles, nitems, nfileitems, nclean, ndirty;
1309    size_t scores[100], i, scores_local[100];
1310    size_t docs, bnodes;
1311    size_t docs_local, bnodes_local;
1312    uint8_t *ptr;
1313
1314    nfiles = nitems = nfileitems = nclean = ndirty = 0;
1315    docs = bnodes = 0;
1316    memset(scores, 0, sizeof(size_t)*100);
1317
1318    struct fnamedic_item *fname;
1319    struct bcache_item *item;
1320    struct dirty_item *dirty;
1321    struct list_elem *ee;
1322    struct avl_node *a;
1323
1324    printf(" === Block cache statistics summary ===\n");
1325    printf("%3s %20s (%6s)(%6s)(c%6s d%6s)",
1326        "No", "Filename", "#Pages", "#Evict", "Clean", "Dirty");
1327#ifdef __CRC32
1328    printf("%6s%6s", "Doc", "Node");
1329#endif
1330    for (i=0;i<=n;++i) {
1331        printf("   [%d] ", (int)i);
1332    }
1333    printf("\n");
1334
1335    for (size_t idx = 0; idx < num_files; ++idx) {
1336        fname = file_list[idx];
1337        memset(scores_local, 0, sizeof(size_t)*100);
1338        nfileitems = nclean = ndirty = 0;
1339        docs_local = bnodes_local = 0;
1340
1341        size_t i = 0;
1342        for (; i < fname->num_shards; ++i) {
1343            ee = list_begin(&fname->shards[i].cleanlist);
1344            a = avl_first(&fname->shards[i].tree);
1345
1346            while(ee){
1347                item = _get_entry(ee, struct bcache_item, list_elem);
1348                scores[item->score]++;
1349                scores_local[item->score]++;
1350                nitems++;
1351                nfileitems++;
1352                nclean++;
1353#ifdef __CRC32
1354                ptr = (uint8_t*)item->addr + bcache_blocksize - 1;
1355                switch (*ptr) {
1356                case BLK_MARKER_BNODE:
1357                    bnodes_local++;
1358                    break;
1359                case BLK_MARKER_DOC:
1360                    docs_local++;
1361                    break;
1362                }
1363#endif
1364                ee = list_next(ee);
1365            }
1366            while(a){
1367                dirty = _get_entry(a, struct dirty_item, avl);
1368                item = dirty->item;
1369                scores[item->score]++;
1370                scores_local[item->score]++;
1371                nitems++;
1372                nfileitems++;
1373                ndirty++;
1374#ifdef __CRC32
1375                ptr = (uint8_t*)item->addr + bcache_blocksize - 1;
1376                switch (*ptr) {
1377                case BLK_MARKER_BNODE:
1378                    bnodes_local++;
1379                    break;
1380                case BLK_MARKER_DOC:
1381                    docs_local++;
1382                    break;
1383                }
1384#endif
1385                a = avl_next(a);
1386            }
1387        }
1388
1389        printf("%3d %20s (%6d)(%6d)(c%6d d%6d)",
1390               (int)nfiles+1, fname->filename,
1391               (int)atomic_get_uint64_t(&fname->nitems),
1392               (int)atomic_get_uint64_t(&fname->nvictim),
1393               (int)nclean, (int)ndirty);
1394        printf("%6d%6d", (int)docs_local, (int)bnodes_local);
1395        for (i=0;i<=n;++i){
1396            printf("%6d ", (int)scores_local[i]);
1397        }
1398        printf("\n");
1399
1400        docs += docs_local;
1401        bnodes += bnodes_local;
1402
1403        nfiles++;
1404    }
1405    printf(" ===\n");
1406
1407    printf("%d files %d items\n", (int)nfiles, (int)nitems);
1408    for (i=0;i<=n;++i){
1409        printf("[%d]: %d\n", (int)i, (int)scores[i]);
1410    }
1411    printf("Documents: %d blocks\n", (int)docs);
1412    printf("Index nodes: %d blocks\n", (int)bnodes);
1413}
1414// LCOV_EXCL_STOP
1415
1416// LCOV_EXCL_START
1417INLINE void _bcache_free_bcache_item(struct hash_elem *h)
1418{
1419    struct bcache_item *item = _get_entry(h, struct bcache_item, hash_elem);
1420    free(item->addr);
1421    free(item);
1422}
1423// LCOV_EXCL_STOP
1424
1425// LCOV_EXCL_START
1426INLINE void _bcache_free_fnamedic(struct hash_elem *h)
1427{
1428    size_t i = 0;
1429    struct fnamedic_item *item;
1430    item = _get_entry(h, struct fnamedic_item, hash_elem);
1431
1432    for (; i < item->num_shards; ++i) {
1433        hash_free_active(&item->shards[i].hashtable, _bcache_free_bcache_item);
1434        spin_destroy(&item->shards[i].lock);
1435    }
1436
1437    free(item->shards);
1438    free(item->filename);
1439
1440    atomic_destroy_uint32_t(&item->ref_count);
1441    atomic_destroy_uint64_t(&item->nvictim);
1442    atomic_destroy_uint64_t(&item->nitems);
1443    atomic_destroy_uint64_t(&item->access_timestamp);
1444    free(item);
1445}
1446// LCOV_EXCL_STOP
1447
1448void bcache_shutdown()
1449{
1450    struct bcache_item *item;
1451    struct list_elem *e;
1452
1453    e = list_begin(&freelist);
1454    while(e) {
1455        item = _get_entry(e, struct bcache_item, list_elem);
1456        e = list_remove(&freelist, e);
1457        freelist_count--;
1458        free(item->addr);
1459        free(item);
1460    }
1461
1462    // Force clean zombies if any
1463    e = list_begin(&file_zombies);
1464    while (e) {
1465        struct fnamedic_item *fname = _get_entry(e, struct fnamedic_item, le);
1466        e = list_remove(&file_zombies, e);
1467        _fname_free(fname);
1468    }
1469    // Free the file list array
1470    free(file_list);
1471
1472    spin_lock(&bcache_lock);
1473    hash_free_active(&fnamedic, _bcache_free_fnamedic);
1474    spin_unlock(&bcache_lock);
1475
1476    spin_destroy(&bcache_lock);
1477    spin_destroy(&freelist_lock);
1478    rw_spin_destroy(&filelist_lock);
1479}
1480
1481