1/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2/* 3 * Copyright 2010 Couchbase, Inc 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18#include <stdlib.h> 19#include <string.h> 20#include <stdint.h> 21 22#if !defined(WIN32) && !defined(_WIN32) 23#include <sys/time.h> 24#endif 25 26#include "hash_functions.h" 27#include "common.h" 28#include "libforestdb/fdb_errors.h" 29#include "hash.h" 30#include "list.h" 31#include "blockcache.h" 32#include "avltree.h" 33#include "atomic.h" 34 35#include "memleak.h" 36 37#ifdef __DEBUG 38#ifndef __DEBUG_BCACHE 39 #undef DBG 40 #undef DBGCMD 41 #undef DBGSW 42 #define DBG(...) 43 #define DBGCMD(...) 44 #define DBGSW(n, ...) 45#endif 46#endif 47 48// global lock 49static spin_t bcache_lock; 50 51// hash table for filename 52static struct hash fnamedic; 53 54// free block list 55static volatile size_t freelist_count; 56static struct list freelist; 57static spin_t freelist_lock; 58 59static const size_t MAX_VICTIM_SELECTIONS = 5; 60 61// file structure list 62static size_t num_files; 63static size_t file_array_capacity; 64static fnamedic_item ** file_list; 65static struct list file_zombies; 66static rw_spin_t filelist_lock; // Reader-Writer spinlock for the file list. 67 68//static struct list cleanlist, dirtylist; 69//static uint64_t nfree, nclean, ndirty; 70static uint64_t bcache_nblock; 71 72static int bcache_blocksize; 73static size_t bcache_flush_unit; 74 75struct bcache_shard { 76 spin_t lock; 77 // list for clean blocks 78 struct list cleanlist; 79 // tree for normal dirty blocks 80 struct avl_tree tree; 81 // tree for index nodes 82 struct avl_tree tree_idx; 83 // hash table for block lookup 84 struct hash hashtable; 85 // list elem for shard LRU 86 struct list_elem le; 87}; 88 89struct fnamedic_item { 90 char *filename; 91 uint16_t filename_len; 92 uint32_t hash; 93 94 // current opened filemgr instance 95 // (can be changed on-the-fly when file is closed and re-opened) 96 struct filemgr *curfile; 97 98 // Shards of the block cache for a file. 99 struct bcache_shard *shards; 100 101 // list elem for FILE_ZOMBIE 102 struct list_elem le; 103 // hash elem for FNAMEDIC 104 struct hash_elem hash_elem; 105 106 atomic_uint32_t ref_count; 107 atomic_uint64_t nvictim; 108 atomic_uint64_t nitems; 109 atomic_uint64_t access_timestamp; 110 size_t num_shards; 111}; 112 113#define BCACHE_DIRTY (0x1) 114#define BCACHE_FREE (0x4) 115 116struct bcache_item { 117 // BID 118 bid_t bid; 119 // contents address 120 void *addr; 121 // hash elem for lookup hash table 122 struct hash_elem hash_elem; 123 // list elem for {free, clean, dirty} lists 124 struct list_elem list_elem; 125 // flag 126 uint8_t flag; 127 // score 128 uint8_t score; 129}; 130 131struct dirty_item { 132 struct bcache_item *item; 133 struct avl_node avl; 134}; 135 136INLINE int _dirty_cmp(struct avl_node *a, struct avl_node *b, void *aux) 137{ 138 struct dirty_item *aa, *bb; 139 aa = _get_entry(a, struct dirty_item, avl); 140 bb = _get_entry(b, struct dirty_item, avl); 141 142 #ifdef __BIT_CMP 143 return _CMP_U64(aa->item->bid , bb->item->bid); 144 145 #else 146 if (aa->item->bid < bb->item->bid) return -1; 147 else if (aa->item->bid > bb->item->bid) return 1; 148 else return 0; 149 150 #endif 151} 152 153INLINE uint32_t _fname_hash(struct hash *hash, struct hash_elem *e) 154{ 155 struct fnamedic_item *item; 156 item = _get_entry(e, struct fnamedic_item, hash_elem); 157 return item->hash % ((unsigned)(BCACHE_NDICBUCKET)); 158} 159 160INLINE int _fname_cmp(struct hash_elem *a, struct hash_elem *b) 161{ 162 size_t len; 163 struct fnamedic_item *aa, *bb; 164 aa = _get_entry(a, struct fnamedic_item, hash_elem); 165 bb = _get_entry(b, struct fnamedic_item, hash_elem); 166 167 if (aa->filename_len == bb->filename_len) { 168 return memcmp(aa->filename, bb->filename, aa->filename_len); 169 }else { 170 len = MIN(aa->filename_len , bb->filename_len); 171 int cmp = memcmp(aa->filename, bb->filename, len); 172 if (cmp != 0) return cmp; 173 else { 174 return (int)((int)aa->filename_len - (int)bb->filename_len); 175 } 176 } 177} 178 179INLINE uint32_t _bcache_hash(struct hash *hash, struct hash_elem *e) 180{ 181 struct bcache_item *item = _get_entry(e, struct bcache_item, hash_elem); 182 return (item->bid) % ((uint32_t)BCACHE_NBUCKET); 183} 184 185INLINE int _bcache_cmp(struct hash_elem *a, struct hash_elem *b) 186{ 187 struct bcache_item *aa, *bb; 188 aa = _get_entry(a, struct bcache_item, hash_elem); 189 bb = _get_entry(b, struct bcache_item, hash_elem); 190 191 #ifdef __BIT_CMP 192 193 return _CMP_U64(aa->bid, bb->bid); 194 195 #else 196 197 if (aa->bid == bb->bid) return 0; 198 else if (aa->bid < bb->bid) return -1; 199 else return 1; 200 201 #endif 202} 203 204#define _list_empty(list) (list.head == NULL) 205#define _tree_empty(tree) (tree.root == NULL) 206 207static void _acquire_all_shard_locks(struct fnamedic_item *fname) { 208 size_t i = 0; 209 for (; i < fname->num_shards; ++i) { 210 spin_lock(&fname->shards[i].lock); 211 } 212} 213 214static void _release_all_shard_locks(struct fnamedic_item *fname) { 215 size_t i = 0; 216 for (; i < fname->num_shards; ++i) { 217 spin_unlock(&fname->shards[i].lock); 218 } 219} 220 221static bool _file_empty(struct fnamedic_item *fname) { 222 bool empty = true; 223 size_t i = 0; 224 _acquire_all_shard_locks(fname); 225 for (; i < fname->num_shards; ++i) { 226 if (!(_list_empty(fname->shards[i].cleanlist) && 227 _tree_empty(fname->shards[i].tree) && 228 _tree_empty(fname->shards[i].tree_idx))) { 229 empty = false; 230 break; 231 } 232 } 233 _release_all_shard_locks(fname); 234 return empty; 235} 236 237INLINE bool _shard_empty(struct bcache_shard *bshard) { 238 // Caller should grab the shard lock before calling this function. 239 return _list_empty(bshard->cleanlist) && 240 _tree_empty(bshard->tree) && 241 _tree_empty(bshard->tree_idx); 242} 243 244struct fnamedic_item *_bcache_get_victim() 245{ 246 struct fnamedic_item *ret = NULL; 247 uint64_t min_timestamp = (uint64_t) -1; 248 uint64_t victim_timestamp; 249 int victim_idx; 250 size_t num_attempts; 251 252 rw_spin_read_lock(&filelist_lock); 253 // Pick the victim that has the smallest access timestamp among files randomly selected. 254 num_attempts = num_files / 10 + 1; 255 if (num_attempts > MAX_VICTIM_SELECTIONS) { 256 num_attempts = MAX_VICTIM_SELECTIONS; 257 } else { 258 if(num_attempts == 1 && num_files > 1) { 259 ++num_attempts; 260 } 261 } 262 for (size_t i = 0; i < num_attempts && num_files; ++i) { 263 victim_idx = rand() % num_files; 264 victim_timestamp = atomic_get_uint64_t(&file_list[victim_idx]->access_timestamp); 265 if (victim_timestamp < min_timestamp && 266 atomic_get_uint64_t(&file_list[victim_idx]->nitems)) { 267 min_timestamp = victim_timestamp; 268 ret = file_list[victim_idx]; 269 } 270 } 271 272 if (ret) { 273 atomic_incr_uint32_t(&ret->ref_count); 274 } 275 rw_spin_read_unlock(&filelist_lock); 276 277 return ret; 278} 279 280static struct bcache_item *_bcache_alloc_freeblock() 281{ 282 struct list_elem *e = NULL; 283 struct bcache_item *item; 284 285 spin_lock(&freelist_lock); 286 e = list_pop_front(&freelist); 287 if (e) { 288 freelist_count--; 289 } 290 spin_unlock(&freelist_lock); 291 292 if (e) { 293 item = _get_entry(e, struct bcache_item, list_elem); 294 return item; 295 } 296 return NULL; 297} 298 299static void _bcache_release_freeblock(struct bcache_item *item) 300{ 301 spin_lock(&freelist_lock); 302 item->flag = BCACHE_FREE; 303 item->score = 0; 304 list_push_front(&freelist, &item->list_elem); 305 freelist_count++; 306 spin_unlock(&freelist_lock); 307} 308 309static struct fnamedic_item *_next_dead_fname_zombie(void) { 310 struct list_elem *e; 311 struct fnamedic_item *fname_item; 312 bool found = false; 313 rw_spin_write_lock(&filelist_lock); 314 e = list_begin(&file_zombies); 315 while (e) { 316 fname_item = _get_entry(e, struct fnamedic_item, le); 317 if (atomic_get_uint32_t(&fname_item->ref_count) == 0) { 318 list_remove(&file_zombies, e); 319 found = true; 320 break; 321 } else { 322 e = list_next(e); 323 } 324 } 325 rw_spin_write_unlock(&filelist_lock); 326 return found ? fname_item : NULL; 327} 328 329static void _fname_free(struct fnamedic_item *fname); 330 331static void _garbage_collect_zombie_fnames(void) { 332 struct fnamedic_item *fname_item = _next_dead_fname_zombie(); 333 while (fname_item) { 334 _fname_free(fname_item); 335 fname_item = _next_dead_fname_zombie(); 336 } 337} 338 339struct dirty_bid { 340 bid_t bid; 341 struct avl_node avl; 342}; 343 344INLINE int _dirty_bid_cmp(struct avl_node *a, struct avl_node *b, void *aux) 345{ 346 struct dirty_bid *aa, *bb; 347 aa = _get_entry(a, struct dirty_bid, avl); 348 bb = _get_entry(b, struct dirty_bid, avl); 349 350 #ifdef __BIT_CMP 351 return _CMP_U64(aa->bid , bb->bid); 352 353 #else 354 if (aa->bid < bb->bid) return -1; 355 else if (aa->bid > bb->bid) return 1; 356 else return 0; 357 358 #endif 359} 360 361static void _free_dirty_blocks(struct dirty_bid **dirty_bids, size_t n) { 362 size_t i = 0; 363 for (; i < n; ++i) { 364 if (dirty_bids[i]) { 365 mempool_free(dirty_bids[i]); 366 } 367 } 368} 369 370// Flush some consecutive or all dirty blocks for a given file and 371// move them to the clean list. 372static fdb_status _flush_dirty_blocks(struct fnamedic_item *fname_item, 373 bool sync, bool flush_all) 374{ 375 void *buf = NULL; 376 struct list_elem *prevhead = NULL; 377 struct avl_tree *cur_tree = NULL; 378 struct avl_node *node = NULL; 379 struct dirty_bid *dbid = NULL; 380 uint64_t count = 0; 381 ssize_t ret = 0; 382 bid_t start_bid = 0, prev_bid = 0; 383 void *ptr = NULL; 384 uint8_t marker = 0x0; 385 fdb_status status = FDB_RESULT_SUCCESS; 386 bool o_direct = false; 387 bool data_block_completed = false; 388 struct avl_tree dirty_blocks; // Cross-shard dirty block list for sequential writes. 389 390 if (fname_item->curfile->config->flag & _ARCH_O_DIRECT) { 391 o_direct = true; 392 } 393 394 // scan and write back dirty blocks sequentially for O_DIRECT option. 395 if (sync && o_direct) { 396 malloc_align(buf, FDB_SECTOR_SIZE, bcache_flush_unit); 397 _acquire_all_shard_locks(fname_item); 398 } 399 400 prev_bid = start_bid = BLK_NOT_FOUND; 401 count = 0; 402 403 avl_init(&dirty_blocks, NULL); 404 405 // Try to flush the dirty data blocks first and then index blocks. 406 size_t i = 0; 407 bool consecutive_blocks = true; 408 struct dirty_bid **dirty_bids = alca(struct dirty_bid *, fname_item->num_shards); 409 memset(dirty_bids, 0x0, sizeof(dirty_bid *) * fname_item->num_shards); 410 while (1) { 411 if (!(node = avl_first(&dirty_blocks))) { 412 for (i = 0; i < fname_item->num_shards; ++i) { 413 if (!(sync && o_direct)) { 414 spin_lock(&fname_item->shards[i].lock); 415 } 416 if (!data_block_completed) { 417 node = avl_first(&fname_item->shards[i].tree); 418 } else { 419 node = avl_first(&fname_item->shards[i].tree_idx); 420 } 421 if (node) { 422 if (!dirty_bids[i]) { 423 dirty_bids[i] = (struct dirty_bid *) 424 mempool_alloc(sizeof(struct dirty_bid)); 425 } 426 dirty_bids[i]->bid = _get_entry(node, struct dirty_item, avl)->item->bid; 427 avl_insert(&dirty_blocks, &dirty_bids[i]->avl, _dirty_bid_cmp); 428 } 429 if (!(sync && o_direct)) { 430 spin_unlock(&fname_item->shards[i].lock); 431 } 432 } 433 if (!(node = avl_first(&dirty_blocks))) { 434 if (!data_block_completed) { 435 data_block_completed = true; 436 if (count > 0 && !flush_all) { 437 // Finished flushing some dirty data blocks. 438 // Not move over to the dirty index block list because 439 // flush_all is not requestd. 440 break; 441 } 442 continue; 443 } else { 444 break; 445 } 446 } 447 } 448 449 dbid = _get_entry(node, struct dirty_bid, avl); 450 451 size_t shard_num = dbid->bid % fname_item->num_shards; 452 if (!(sync && o_direct)) { 453 spin_lock(&fname_item->shards[shard_num].lock); 454 } 455 if (!data_block_completed) { 456 cur_tree = &fname_item->shards[shard_num].tree; 457 } else { 458 cur_tree = &fname_item->shards[shard_num].tree_idx; 459 } 460 461 struct dirty_item *dirty_block = NULL; 462 bool item_exist = false; 463 node = avl_first(cur_tree); 464 if (node) { 465 dirty_block = _get_entry(node, struct dirty_item, avl); 466 if (dbid->bid == dirty_block->item->bid) { 467 item_exist = true; 468 } 469 } 470 // remove from the cross-shard dirty block list. 471 avl_remove(&dirty_blocks, &dbid->avl); 472 if (!item_exist) { 473 // The original first item in the shard dirty block list was removed. 474 // Grab the next one from the cross-shard dirty block list. 475 if (!(sync && o_direct)) { 476 spin_unlock(&fname_item->shards[shard_num].lock); 477 } 478 continue; 479 } 480 481 consecutive_blocks = true; 482 // if BID of next dirty block is not consecutive .. stop 483 if (dirty_block->item->bid != prev_bid + 1 && prev_bid != BLK_NOT_FOUND && 484 sync) { 485 if (flush_all) { 486 consecutive_blocks = false; 487 } else { 488 if (!(sync && o_direct)) { 489 spin_unlock(&fname_item->shards[shard_num].lock); 490 } 491 break; 492 } 493 } 494 // set START_BID if this is the start block for a single batch write. 495 if (start_bid == BLK_NOT_FOUND) { 496 start_bid = dirty_block->item->bid; 497 } 498 // set PREV_BID and go to next block 499 prev_bid = dirty_block->item->bid; 500 501 // set PTR and get block MARKER 502 ptr = dirty_block->item->addr; 503 marker = *((uint8_t*)(ptr) + bcache_blocksize-1); 504 dirty_block->item->flag &= ~(BCACHE_DIRTY); 505 if (sync) { 506 // copy to buffer 507#ifdef __CRC32 508 if (marker == BLK_MARKER_BNODE) { 509 // b-tree node .. calculate crc32 and put it into the block 510 memset((uint8_t *)(ptr) + BTREE_CRC_OFFSET, 511 0xff, BTREE_CRC_FIELD_LEN); 512 uint32_t crc = chksum(ptr, bcache_blocksize); 513 crc = _endian_encode(crc); 514 memcpy((uint8_t *)(ptr) + BTREE_CRC_OFFSET, &crc, sizeof(crc)); 515 } 516#endif 517 if (o_direct) { 518 if (count > 0 && !consecutive_blocks) { 519 size_t bytes_written; 520 // Note that this path can be only executed in flush_all case. 521 bytes_written = (size_t)fname_item->curfile->ops->pwrite( 522 fname_item->curfile->fd, 523 buf, count * bcache_blocksize, 524 start_bid * bcache_blocksize); 525 if (bytes_written != count * bcache_blocksize) { 526 count = 0; 527 status = FDB_RESULT_WRITE_FAIL; 528 break; 529 } 530 // Start a new batch again. 531 count = 0; 532 start_bid = dirty_block->item->bid; 533 } 534 memcpy((uint8_t *)(buf) + count*bcache_blocksize, 535 dirty_block->item->addr, bcache_blocksize); 536 } else { 537 ret = fname_item->curfile->ops->pwrite(fname_item->curfile->fd, 538 dirty_block->item->addr, 539 bcache_blocksize, 540 dirty_block->item->bid * bcache_blocksize); 541 if (ret != bcache_blocksize) { 542 if (!(sync && o_direct)) { 543 spin_unlock(&fname_item->shards[shard_num].lock); 544 } 545 status = FDB_RESULT_WRITE_FAIL; 546 break; 547 } 548 } 549 } 550 551 node = avl_next(node); 552 // remove from the shard dirty block list. 553 avl_remove(cur_tree, &dirty_block->avl); 554 555 // move to the shard clean block list. 556 prevhead = fname_item->shards[shard_num].cleanlist.head; 557 (void)prevhead; 558 list_push_front(&fname_item->shards[shard_num].cleanlist, 559 &dirty_block->item->list_elem); 560 561 fdb_assert(!(dirty_block->item->flag & BCACHE_FREE), 562 dirty_block->item->flag, BCACHE_FREE); 563 fdb_assert(dirty_block->item->list_elem.prev == NULL && 564 prevhead == dirty_block->item->list_elem.next, 565 prevhead, dirty_block->item->list_elem.next); 566 mempool_free(dirty_block); 567 568 // Get the next dirty block from the victim shard and insert it into 569 // the cross-shard dirty block list. 570 if (node) { 571 dbid->bid = _get_entry(node, struct dirty_item, avl)->item->bid; 572 avl_insert(&dirty_blocks, &dbid->avl, _dirty_bid_cmp); 573 } 574 if (!(sync && o_direct)) { 575 spin_unlock(&fname_item->shards[shard_num].lock); 576 } 577 578 count++; 579 if (count*bcache_blocksize >= bcache_flush_unit && sync) { 580 if (flush_all) { 581 if (o_direct) { 582 ret = fname_item->curfile->ops->pwrite(fname_item->curfile->fd, 583 buf, count * bcache_blocksize, 584 start_bid * bcache_blocksize); 585 if ((size_t)ret != count * bcache_blocksize) { 586 count = 0; 587 status = FDB_RESULT_WRITE_FAIL; 588 break; 589 } 590 count = 0; 591 start_bid = BLK_NOT_FOUND; 592 prev_bid = BLK_NOT_FOUND; 593 } 594 } else { 595 break; 596 } 597 } 598 } 599 600 // synchronize 601 if (sync && o_direct) { 602 if (count > 0) { 603 ret = fname_item->curfile->ops->pwrite(fname_item->curfile->fd, buf, 604 count * bcache_blocksize, 605 start_bid * bcache_blocksize); 606 if ((size_t)ret != count * bcache_blocksize) { 607 status = FDB_RESULT_WRITE_FAIL; 608 } 609 } 610 _release_all_shard_locks(fname_item); 611 free_align(buf); 612 } 613 614 _free_dirty_blocks(dirty_bids, fname_item->num_shards); 615 return status; 616} 617 618// perform eviction 619static struct list_elem * _bcache_evict(struct fnamedic_item *curfile) 620{ 621 size_t n_evict; 622 struct list_elem *e = NULL; 623 struct bcache_item *item; 624 struct fnamedic_item *victim = NULL; 625 626 // We don't need to grab the global buffer cache lock here because 627 // the file's buffer cache instance (fnamedic_item) can be freed only if 628 // there are no database handles opened for that file. 629 630 while (victim == NULL) { 631 // select a victim file 632 victim = _bcache_get_victim(); 633 while(victim) { 634 // check whether this file has at least one block to be evictied 635 if (atomic_get_uint64_t(&victim->nitems)) { 636 // select this file as victim 637 break; 638 } else { 639 atomic_decr_uint32_t(&victim->ref_count); 640 victim = NULL; // Try to select a victim again 641 } 642 } 643 } 644 fdb_assert(victim, victim, NULL); 645 646 atomic_incr_uint64_t(&victim->nvictim); 647 648 // select the clean blocks from the victim file 649 n_evict = 0; 650 while(n_evict < BCACHE_EVICT_UNIT) { 651 size_t num_shards = victim->num_shards; 652 size_t i = random(num_shards); 653 bool found_victim_shard = false; 654 bcache_shard *bshard = NULL; 655 656 for (size_t to_visit = num_shards; to_visit; --to_visit) { 657 i = (i + 1) % num_shards; // Round robin over empty shards.. 658 bshard = &victim->shards[i]; 659 spin_lock(&bshard->lock); 660 if (_shard_empty(bshard)) { 661 spin_unlock(&bshard->lock); 662 continue; 663 } 664 e = list_pop_back(&bshard->cleanlist); 665 if(!e) { 666 spin_unlock(&bshard->lock); 667 // When the victim shard has no clean block, evict some dirty blocks 668 // from shards. 669 if (_flush_dirty_blocks(victim, true, false) != FDB_RESULT_SUCCESS) { 670 atomic_decr_uint32_t(&victim->ref_count); 671 return NULL; 672 } 673 continue; // Select a victim shard again. 674 } 675 676 item = _get_entry(e, struct bcache_item, list_elem); 677#ifdef __BCACHE_SECOND_CHANCE 678 // repeat until zero-score item is found 679 if (item->score == 0) { 680 found_victim_shard = true; 681 break; 682 } else { 683 // give second chance to the item 684 item->score--; 685 list_push_front(&bshard->cleanlist, &item->list_elem); 686 spin_unlock(&bshard->lock); 687 } 688#else 689 found_victim_shard = true; 690 break; 691#endif 692 } 693 if (!found_victim_shard) { 694 // We couldn't find any non-empty shards even after 'num_shards' 695 // attempts. 696 // The file is *likely* empty. Note that it is OK to return NULL 697 // even if the file is not empty because the caller will retry again. 698 atomic_decr_uint32_t(&victim->ref_count); 699 return NULL; 700 } 701 702 atomic_decr_uint64_t(&victim->nitems); 703 // remove from hash and insert into freelist 704 hash_remove(&bshard->hashtable, &item->hash_elem); 705 // add to freelist 706 _bcache_release_freeblock(item); 707 n_evict++; 708 709 spin_unlock(&bshard->lock); 710 711 if (atomic_get_uint64_t(&victim->nitems) == 0) { 712 break; 713 } 714 } 715 716 atomic_decr_uint32_t(&victim->ref_count); 717 return &item->list_elem; 718} 719 720static struct fnamedic_item * _fname_create(struct filemgr *file) { 721 // TODO: we MUST NOT directly read file sturcture 722 723 struct fnamedic_item *fname_new; 724 // Before we create a new filename entry, garbage collect zombies 725 _garbage_collect_zombie_fnames(); 726 fname_new = (struct fnamedic_item *)malloc(sizeof(struct fnamedic_item)); 727 728 fname_new->filename_len = strlen(file->filename); 729 fname_new->filename = (char *)malloc(fname_new->filename_len + 1); 730 memcpy(fname_new->filename, file->filename, fname_new->filename_len); 731 fname_new->filename[fname_new->filename_len] = 0; 732 733 // calculate hash value 734 fname_new->hash = chksum((void *)fname_new->filename, 735 fname_new->filename_len); 736 fname_new->curfile = file; 737 atomic_init_uint64_t(&fname_new->nvictim, 0); 738 atomic_init_uint64_t(&fname_new->nitems, 0); 739 atomic_init_uint32_t(&fname_new->ref_count, 0); 740 atomic_init_uint64_t(&fname_new->access_timestamp, 0); 741 if (file->config->num_bcache_shards) { 742 fname_new->num_shards = file->config->num_bcache_shards; 743 } else { 744 fname_new->num_shards = DEFAULT_NUM_BCACHE_PARTITIONS; 745 } 746 // For random eviction among shards 747 randomize(); 748 749 fname_new->shards = (bcache_shard *) 750 malloc(sizeof(struct bcache_shard) * fname_new->num_shards); 751 size_t i = 0; 752 for (; i < fname_new->num_shards; ++i) { 753 // initialize tree 754 avl_init(&fname_new->shards[i].tree, NULL); 755 avl_init(&fname_new->shards[i].tree_idx, NULL); 756 // initialize clean list 757 list_init(&fname_new->shards[i].cleanlist); 758 // initialize hash table 759 hash_init(&fname_new->shards[i].hashtable, BCACHE_NBUCKET, 760 _bcache_hash, _bcache_cmp); 761 spin_init(&fname_new->shards[i].lock); 762 } 763 764 // insert into fname dictionary 765 hash_insert(&fnamedic, &fname_new->hash_elem); 766 file->bcache = fname_new; 767 768 rw_spin_write_lock(&filelist_lock); 769 if (num_files == file_array_capacity) { 770 file_array_capacity *= 2; 771 file_list = (struct fnamedic_item **) realloc(file_list, file_array_capacity); 772 } 773 file_list[num_files++] = fname_new; 774 rw_spin_write_unlock(&filelist_lock); 775 776 return fname_new; 777} 778 779static bool _fname_try_free(struct fnamedic_item *fname) 780{ 781 bool ret = true; 782 783 rw_spin_write_lock(&filelist_lock); 784 // Remove from the file list array 785 bool found = false; 786 for (size_t i = 0; i < num_files; ++i) { 787 if (file_list[i] == fname) { 788 found = true; 789 } 790 if (found && (i+1 < num_files)) { 791 file_list[i] = file_list[i+1]; 792 } 793 } 794 fdb_assert(found, num_files, fname->ref_count.val); 795 file_list[num_files - 1] = NULL; 796 --num_files; 797 if (atomic_get_uint32_t(&fname->ref_count) != 0) { 798 // This item is a victim by another thread's _bcache_evict() 799 list_push_front(&file_zombies, &fname->le); 800 ret = false; // Delay deletion 801 } 802 803 rw_spin_write_unlock(&filelist_lock); 804 return ret; 805} 806 807static void _fname_free(struct fnamedic_item *fname) 808{ 809 // file must be empty 810 fdb_assert(_file_empty(fname), false, true); 811 uint32_t ref_count = atomic_get_uint32_t(&fname->ref_count); 812 fdb_assert(ref_count == 0, 0, ref_count); 813 814 // free hash 815 size_t i = 0; 816 for (; i < fname->num_shards; ++i) { 817 hash_free(&fname->shards[i].hashtable); 818 spin_destroy(&fname->shards[i].lock); 819 } 820 821 free(fname->shards); 822 free(fname->filename); 823 atomic_destroy_uint32_t(&fname->ref_count); 824 atomic_destroy_uint64_t(&fname->nvictim); 825 atomic_destroy_uint64_t(&fname->nitems); 826 atomic_destroy_uint64_t(&fname->access_timestamp); 827 free(fname); 828} 829 830INLINE void _bcache_set_score(struct bcache_item *item) 831{ 832#ifdef __CRC32 833 uint8_t marker; 834 835 // set PTR and get block MARKER 836 marker = *((uint8_t*)(item->addr) + bcache_blocksize-1); 837 if (marker == BLK_MARKER_BNODE ) { 838 // b-tree node .. set item's score to 1 839 item->score = 1; 840 } else { 841 item->score = 0; 842 } 843#endif 844} 845 846int bcache_read(struct filemgr *file, bid_t bid, void *buf) 847{ 848 struct hash_elem *h; 849 struct bcache_item *item; 850 struct bcache_item query; 851 struct fnamedic_item *fname; 852 853 // Note that we don't need to grab bcache_lock here as the block cache 854 // is already created and binded when the file is created or opened for 855 // the first time. 856 fname = file->bcache; 857 858 if (fname) { 859 // file exists 860 // set query 861 query.bid = bid; 862 // Update the access timestamp. 863 struct timeval tp; 864 gettimeofday(&tp, NULL); // TODO: Need to implement a better way of 865 // getting the timestamp to avoid the overhead of 866 // gettimeofday() 867 atomic_store_uint64_t(&fname->access_timestamp, 868 (uint64_t) (tp.tv_sec * 1000000 + tp.tv_usec)); 869 870 size_t shard_num = bid % fname->num_shards; 871 spin_lock(&fname->shards[shard_num].lock); 872 873 // search shard hash table 874 h = hash_find(&fname->shards[shard_num].hashtable, &query.hash_elem); 875 if (h) { 876 // cache hit 877 item = _get_entry(h, struct bcache_item, hash_elem); 878 fdb_assert(!(item->flag & BCACHE_FREE), item->flag, file); 879 880 // move the item to the head of list if the block is clean 881 // (don't care if the block is dirty) 882 if (!(item->flag & BCACHE_DIRTY)) { 883 // TODO: Scanning the list would cause some overhead. We need to devise 884 // the better data structure to provide a fast lookup for the clean list. 885 list_remove(&fname->shards[shard_num].cleanlist, &item->list_elem); 886 list_push_front(&fname->shards[shard_num].cleanlist, &item->list_elem); 887 } 888 889 memcpy(buf, item->addr, bcache_blocksize); 890 _bcache_set_score(item); 891 892 spin_unlock(&fname->shards[shard_num].lock); 893 894 return bcache_blocksize; 895 } else { 896 // cache miss 897 spin_unlock(&fname->shards[shard_num].lock); 898 } 899 } 900 901 // does not exist .. cache miss 902 return 0; 903} 904 905void bcache_invalidate_block(struct filemgr *file, bid_t bid) 906{ 907 struct hash_elem *h; 908 struct bcache_item *item; 909 struct bcache_item query; 910 struct fnamedic_item *fname; 911 912 // Note that we don't need to grab bcache_lock here as the block cache 913 // is already created and binded when the file is created or opened for 914 // the first time. 915 fname = file->bcache; 916 917 if (fname) { 918 // file exists 919 // set query 920 query.bid = bid; 921 // Update the access timestamp. 922 struct timeval tp; 923 gettimeofday(&tp, NULL); 924 atomic_store_uint64_t(&fname->access_timestamp, 925 (uint64_t) (tp.tv_sec * 1000000 + tp.tv_usec)); 926 927 size_t shard_num = bid % fname->num_shards; 928 spin_lock(&fname->shards[shard_num].lock); 929 930 // search BHASH 931 h = hash_find(&fname->shards[shard_num].hashtable, &query.hash_elem); 932 if (h) { 933 // cache hit 934 item = _get_entry(h, struct bcache_item, hash_elem); 935 fdb_assert(!(item->flag & BCACHE_FREE), item->flag, BCACHE_FREE); 936 937 if (!(item->flag & BCACHE_DIRTY)) { 938 atomic_decr_uint64_t(&fname->nitems); 939 // only for clean blocks 940 // remove from hash and insert into freelist 941 hash_remove(&fname->shards[shard_num].hashtable, &item->hash_elem); 942 // remove from clean list 943 list_remove(&fname->shards[shard_num].cleanlist, &item->list_elem); 944 spin_unlock(&fname->shards[shard_num].lock); 945 946 // add to freelist 947 _bcache_release_freeblock(item); 948 } else { 949 spin_unlock(&fname->shards[shard_num].lock); 950 } 951 } else { 952 // cache miss 953 spin_unlock(&fname->shards[shard_num].lock); 954 } 955 } 956} 957 958int bcache_write(struct filemgr *file, 959 bid_t bid, 960 void *buf, 961 bcache_dirty_t dirty) 962{ 963 struct hash_elem *h = NULL; 964 struct bcache_item *item; 965 struct bcache_item query; 966 struct fnamedic_item *fname_new; 967 968 fname_new = file->bcache; 969 if (fname_new == NULL) { 970 spin_lock(&bcache_lock); 971 fname_new = file->bcache; 972 if (fname_new == NULL) { 973 // filename doesn't exist in filename dictionary .. create 974 fname_new = _fname_create(file); 975 } 976 spin_unlock(&bcache_lock); 977 } 978 979 // Update the access timestamp. 980 struct timeval tp; 981 gettimeofday(&tp, NULL); 982 atomic_store_uint64_t(&fname_new->access_timestamp, 983 (uint64_t) (tp.tv_sec * 1000000 + tp.tv_usec)); 984 985 size_t shard_num = bid % fname_new->num_shards; 986 // set query 987 query.bid = bid; 988 989 spin_lock(&fname_new->shards[shard_num].lock); 990 991 // search hash table 992 h = hash_find(&fname_new->shards[shard_num].hashtable, &query.hash_elem); 993 if (h == NULL) { 994 // cache miss 995 // get a free block 996 while ((item = _bcache_alloc_freeblock()) == NULL) { 997 // no free block .. perform eviction 998 spin_unlock(&fname_new->shards[shard_num].lock); 999 1000 _bcache_evict(fname_new); 1001 1002 spin_lock(&fname_new->shards[shard_num].lock); 1003 } 1004 1005 // re-search hash table 1006 h = hash_find(&fname_new->shards[shard_num].hashtable, &query.hash_elem); 1007 if (h == NULL) { 1008 // insert into hash table 1009 item->bid = bid; 1010 item->flag = BCACHE_FREE; 1011 hash_insert(&fname_new->shards[shard_num].hashtable, &item->hash_elem); 1012 h = &item->hash_elem; 1013 } else { 1014 // insert into freelist again 1015 _bcache_release_freeblock(item); 1016 item = _get_entry(h, struct bcache_item, hash_elem); 1017 } 1018 } else { 1019 item = _get_entry(h, struct bcache_item, hash_elem); 1020 } 1021 1022 fdb_assert(h, h, NULL); 1023 1024 if (item->flag & BCACHE_FREE) { 1025 atomic_incr_uint64_t(&fname_new->nitems); 1026 } 1027 1028 // remove from the list if the block is in clean list 1029 if (!(item->flag & BCACHE_DIRTY) && !(item->flag & BCACHE_FREE)) { 1030 list_remove(&fname_new->shards[shard_num].cleanlist, &item->list_elem); 1031 } 1032 item->flag &= ~BCACHE_FREE; 1033 1034 if (dirty == BCACHE_REQ_DIRTY) { 1035 // DIRTY request 1036 // to avoid re-insert already existing item into tree 1037 if (!(item->flag & BCACHE_DIRTY)) { 1038 // dirty block 1039 // insert into tree 1040 struct dirty_item *ditem; 1041 uint8_t marker; 1042 1043 ditem = (struct dirty_item *) 1044 mempool_alloc(sizeof(struct dirty_item)); 1045 ditem->item = item; 1046 1047 marker = *((uint8_t*)buf + bcache_blocksize-1); 1048 if (marker == BLK_MARKER_BNODE ) { 1049 // b-tree node 1050 avl_insert(&fname_new->shards[shard_num].tree_idx, &ditem->avl, _dirty_cmp); 1051 } else { 1052 avl_insert(&fname_new->shards[shard_num].tree, &ditem->avl, _dirty_cmp); 1053 } 1054 } 1055 item->flag |= BCACHE_DIRTY; 1056 } else { 1057 // CLEAN request 1058 // insert into clean list only when it was originally clean 1059 if (!(item->flag & BCACHE_DIRTY)) { 1060 list_push_front(&fname_new->shards[shard_num].cleanlist, &item->list_elem); 1061 item->flag &= ~(BCACHE_DIRTY); 1062 } 1063 } 1064 1065 memcpy(item->addr, buf, bcache_blocksize); 1066 _bcache_set_score(item); 1067 1068 spin_unlock(&fname_new->shards[shard_num].lock); 1069 1070 return bcache_blocksize; 1071} 1072 1073int bcache_write_partial(struct filemgr *file, 1074 bid_t bid, 1075 void *buf, 1076 size_t offset, 1077 size_t len) 1078{ 1079 struct hash_elem *h; 1080 struct bcache_item *item; 1081 struct bcache_item query; 1082 struct fnamedic_item *fname_new; 1083 1084 fname_new = file->bcache; 1085 if (fname_new == NULL) { 1086 spin_lock(&bcache_lock); 1087 fname_new = file->bcache; 1088 if (fname_new == NULL) { 1089 // filename doesn't exist in filename dictionary .. create 1090 fname_new = _fname_create(file); 1091 } 1092 spin_unlock(&bcache_lock); 1093 } 1094 1095 // Update the access timestamp. 1096 struct timeval tp; 1097 gettimeofday(&tp, NULL); 1098 atomic_store_uint64_t(&fname_new->access_timestamp, 1099 (uint64_t) (tp.tv_sec * 1000000 + tp.tv_usec)); 1100 1101 size_t shard_num = bid % fname_new->num_shards; 1102 // set query 1103 query.bid = bid; 1104 1105 spin_lock(&fname_new->shards[shard_num].lock); 1106 1107 // search hash table 1108 h = hash_find(&fname_new->shards[shard_num].hashtable, &query.hash_elem); 1109 if (h == NULL) { 1110 // cache miss .. partial write fail .. return 0 1111 spin_unlock(&fname_new->shards[shard_num].lock); 1112 return 0; 1113 1114 } else { 1115 // cache hit .. get the block 1116 item = _get_entry(h, struct bcache_item, hash_elem); 1117 } 1118 1119 fdb_assert(!(item->flag & BCACHE_FREE), item->flag, BCACHE_FREE); 1120 1121 // check whether this is dirty block 1122 // to avoid re-insert already existing item into tree 1123 if (!(item->flag & BCACHE_DIRTY)) { 1124 // this block was clean block 1125 uint8_t marker; 1126 struct dirty_item *ditem; 1127 1128 // remove from clean list 1129 list_remove(&fname_new->shards[shard_num].cleanlist, &item->list_elem); 1130 1131 ditem = (struct dirty_item *)mempool_alloc(sizeof(struct dirty_item)); 1132 ditem->item = item; 1133 1134 // insert into tree 1135 marker = *((uint8_t*)item->addr + bcache_blocksize-1); 1136 if (marker == BLK_MARKER_BNODE ) { 1137 // b-tree node 1138 avl_insert(&fname_new->shards[shard_num].tree_idx, &ditem->avl, _dirty_cmp); 1139 } else { 1140 avl_insert(&fname_new->shards[shard_num].tree, &ditem->avl, _dirty_cmp); 1141 } 1142 } 1143 1144 // always set this block as dirty 1145 item->flag |= BCACHE_DIRTY; 1146 1147 memcpy((uint8_t *)(item->addr) + offset, buf, len); 1148 _bcache_set_score(item); 1149 1150 spin_unlock(&fname_new->shards[shard_num].lock); 1151 1152 return len; 1153} 1154 1155// remove all dirty blocks of the FILE 1156// (they are only discarded and not written back) 1157void bcache_remove_dirty_blocks(struct filemgr *file) 1158{ 1159 struct fnamedic_item *fname_item; 1160 1161 fname_item = file->bcache; 1162 1163 if (fname_item) { 1164 // Note that this function is only invoked as part of database file close or 1165 // removal when there are no database handles for a given file. Therefore, 1166 // we don't need to grab all the shard locks at once. 1167 1168 // remove all dirty blocks 1169 _flush_dirty_blocks(fname_item, false, true); 1170 } 1171} 1172 1173// remove all clean blocks of the FILE 1174void bcache_remove_clean_blocks(struct filemgr *file) 1175{ 1176 struct list_elem *e; 1177 struct bcache_item *item; 1178 struct fnamedic_item *fname_item; 1179 1180 fname_item = file->bcache; 1181 1182 if (fname_item) { 1183 // Note that this function is only invoked as part of database file close or 1184 // removal when there are no database handles for a given file. Therefore, 1185 // we don't need to grab all the shard locks at once. 1186 1187 // remove all clean blocks from each shard in a file. 1188 size_t i = 0; 1189 for (; i < fname_item->num_shards; ++i) { 1190 spin_lock(&fname_item->shards[i].lock); 1191 e = list_begin(&fname_item->shards[i].cleanlist); 1192 while(e){ 1193 item = _get_entry(e, struct bcache_item, list_elem); 1194 // remove from clean list 1195 e = list_remove(&fname_item->shards[i].cleanlist, e); 1196 // remove from hash table 1197 hash_remove(&fname_item->shards[i].hashtable, &item->hash_elem); 1198 // insert into free list 1199 _bcache_release_freeblock(item); 1200 } 1201 spin_unlock(&fname_item->shards[i].lock); 1202 } 1203 } 1204} 1205 1206// remove file from filename dictionary 1207// MUST sure that there is no dirty block belongs to this FILE 1208// (or memory leak occurs) 1209bool bcache_remove_file(struct filemgr *file) 1210{ 1211 bool rv = false; 1212 struct fnamedic_item *fname_item; 1213 1214 // Before proceeding with deletion, garbage collect zombie files 1215 _garbage_collect_zombie_fnames(); 1216 fname_item = file->bcache; 1217 1218 if (fname_item) { 1219 // acquire lock 1220 spin_lock(&bcache_lock); 1221 // file must be empty 1222 fdb_assert(_file_empty(fname_item), fname_item, NULL); 1223 1224 // remove from fname dictionary hash table 1225 hash_remove(&fnamedic, &fname_item->hash_elem); 1226 spin_unlock(&bcache_lock); 1227 1228 // We don't need to grab the file buffer cache's partition locks 1229 // at once because this function is only invoked when there are 1230 // no database handles that access the file. 1231 if (_fname_try_free(fname_item)) { 1232 _fname_free(fname_item); // no other callers accessing this file 1233 rv = true; 1234 } // else fnamedic_item is in use by _bcache_evict. Deletion delayed 1235 } 1236 return rv; 1237} 1238 1239// flush and synchronize all dirty blocks of the FILE 1240// dirty blocks will be changed to clean blocks (not discarded) 1241fdb_status bcache_flush(struct filemgr *file) 1242{ 1243 struct fnamedic_item *fname_item; 1244 fdb_status status = FDB_RESULT_SUCCESS; 1245 1246 fname_item = file->bcache; 1247 1248 if (fname_item) { 1249 // Note that this function is invoked as part of a commit operation while 1250 // the filemgr's lock is already grabbed by a committer. 1251 // Therefore, we don't need to grab all the shard locks at once. 1252 status = _flush_dirty_blocks(fname_item, true, true); 1253 } 1254 return status; 1255} 1256 1257void bcache_init(int nblock, int blocksize) 1258{ 1259 int i; 1260 struct bcache_item *item; 1261 struct list_elem *e; 1262 1263 list_init(&freelist); 1264 list_init(&file_zombies); 1265 1266 hash_init(&fnamedic, BCACHE_NDICBUCKET, _fname_hash, _fname_cmp); 1267 1268 bcache_blocksize = blocksize; 1269 bcache_flush_unit = BCACHE_FLUSH_UNIT; 1270 bcache_nblock = nblock; 1271 spin_init(&bcache_lock); 1272 spin_init(&freelist_lock); 1273 rw_spin_init(&filelist_lock); 1274 freelist_count = 0; 1275 1276 num_files = 0; 1277 file_array_capacity = 4096; // Initial capacity of file list array. 1278 file_list = (fnamedic_item **) calloc(file_array_capacity, sizeof(fnamedic_item *)); 1279 1280 for (i=0;i<nblock;++i){ 1281 item = (struct bcache_item *)malloc(sizeof(struct bcache_item)); 1282 1283 item->bid = BLK_NOT_FOUND; 1284 item->flag = 0x0 | BCACHE_FREE; 1285 item->score = 0; 1286 1287 list_push_front(&freelist, &item->list_elem); 1288 freelist_count++; 1289 } 1290 e = list_begin(&freelist); 1291 while(e){ 1292 item = _get_entry(e, struct bcache_item, list_elem); 1293 item->addr = (void *)malloc(bcache_blocksize); 1294 e = list_next(e); 1295 } 1296 1297} 1298 1299uint64_t bcache_get_num_free_blocks() 1300{ 1301 return freelist_count; 1302} 1303 1304// LCOV_EXCL_START 1305void bcache_print_items() 1306{ 1307 size_t n=1; 1308 size_t nfiles, nitems, nfileitems, nclean, ndirty; 1309 size_t scores[100], i, scores_local[100]; 1310 size_t docs, bnodes; 1311 size_t docs_local, bnodes_local; 1312 uint8_t *ptr; 1313 1314 nfiles = nitems = nfileitems = nclean = ndirty = 0; 1315 docs = bnodes = 0; 1316 memset(scores, 0, sizeof(size_t)*100); 1317 1318 struct fnamedic_item *fname; 1319 struct bcache_item *item; 1320 struct dirty_item *dirty; 1321 struct list_elem *ee; 1322 struct avl_node *a; 1323 1324 printf(" === Block cache statistics summary ===\n"); 1325 printf("%3s %20s (%6s)(%6s)(c%6s d%6s)", 1326 "No", "Filename", "#Pages", "#Evict", "Clean", "Dirty"); 1327#ifdef __CRC32 1328 printf("%6s%6s", "Doc", "Node"); 1329#endif 1330 for (i=0;i<=n;++i) { 1331 printf(" [%d] ", (int)i); 1332 } 1333 printf("\n"); 1334 1335 for (size_t idx = 0; idx < num_files; ++idx) { 1336 fname = file_list[idx]; 1337 memset(scores_local, 0, sizeof(size_t)*100); 1338 nfileitems = nclean = ndirty = 0; 1339 docs_local = bnodes_local = 0; 1340 1341 size_t i = 0; 1342 for (; i < fname->num_shards; ++i) { 1343 ee = list_begin(&fname->shards[i].cleanlist); 1344 a = avl_first(&fname->shards[i].tree); 1345 1346 while(ee){ 1347 item = _get_entry(ee, struct bcache_item, list_elem); 1348 scores[item->score]++; 1349 scores_local[item->score]++; 1350 nitems++; 1351 nfileitems++; 1352 nclean++; 1353#ifdef __CRC32 1354 ptr = (uint8_t*)item->addr + bcache_blocksize - 1; 1355 switch (*ptr) { 1356 case BLK_MARKER_BNODE: 1357 bnodes_local++; 1358 break; 1359 case BLK_MARKER_DOC: 1360 docs_local++; 1361 break; 1362 } 1363#endif 1364 ee = list_next(ee); 1365 } 1366 while(a){ 1367 dirty = _get_entry(a, struct dirty_item, avl); 1368 item = dirty->item; 1369 scores[item->score]++; 1370 scores_local[item->score]++; 1371 nitems++; 1372 nfileitems++; 1373 ndirty++; 1374#ifdef __CRC32 1375 ptr = (uint8_t*)item->addr + bcache_blocksize - 1; 1376 switch (*ptr) { 1377 case BLK_MARKER_BNODE: 1378 bnodes_local++; 1379 break; 1380 case BLK_MARKER_DOC: 1381 docs_local++; 1382 break; 1383 } 1384#endif 1385 a = avl_next(a); 1386 } 1387 } 1388 1389 printf("%3d %20s (%6d)(%6d)(c%6d d%6d)", 1390 (int)nfiles+1, fname->filename, 1391 (int)atomic_get_uint64_t(&fname->nitems), 1392 (int)atomic_get_uint64_t(&fname->nvictim), 1393 (int)nclean, (int)ndirty); 1394 printf("%6d%6d", (int)docs_local, (int)bnodes_local); 1395 for (i=0;i<=n;++i){ 1396 printf("%6d ", (int)scores_local[i]); 1397 } 1398 printf("\n"); 1399 1400 docs += docs_local; 1401 bnodes += bnodes_local; 1402 1403 nfiles++; 1404 } 1405 printf(" ===\n"); 1406 1407 printf("%d files %d items\n", (int)nfiles, (int)nitems); 1408 for (i=0;i<=n;++i){ 1409 printf("[%d]: %d\n", (int)i, (int)scores[i]); 1410 } 1411 printf("Documents: %d blocks\n", (int)docs); 1412 printf("Index nodes: %d blocks\n", (int)bnodes); 1413} 1414// LCOV_EXCL_STOP 1415 1416// LCOV_EXCL_START 1417INLINE void _bcache_free_bcache_item(struct hash_elem *h) 1418{ 1419 struct bcache_item *item = _get_entry(h, struct bcache_item, hash_elem); 1420 free(item->addr); 1421 free(item); 1422} 1423// LCOV_EXCL_STOP 1424 1425// LCOV_EXCL_START 1426INLINE void _bcache_free_fnamedic(struct hash_elem *h) 1427{ 1428 size_t i = 0; 1429 struct fnamedic_item *item; 1430 item = _get_entry(h, struct fnamedic_item, hash_elem); 1431 1432 for (; i < item->num_shards; ++i) { 1433 hash_free_active(&item->shards[i].hashtable, _bcache_free_bcache_item); 1434 spin_destroy(&item->shards[i].lock); 1435 } 1436 1437 free(item->shards); 1438 free(item->filename); 1439 1440 atomic_destroy_uint32_t(&item->ref_count); 1441 atomic_destroy_uint64_t(&item->nvictim); 1442 atomic_destroy_uint64_t(&item->nitems); 1443 atomic_destroy_uint64_t(&item->access_timestamp); 1444 free(item); 1445} 1446// LCOV_EXCL_STOP 1447 1448void bcache_shutdown() 1449{ 1450 struct bcache_item *item; 1451 struct list_elem *e; 1452 1453 e = list_begin(&freelist); 1454 while(e) { 1455 item = _get_entry(e, struct bcache_item, list_elem); 1456 e = list_remove(&freelist, e); 1457 freelist_count--; 1458 free(item->addr); 1459 free(item); 1460 } 1461 1462 // Force clean zombies if any 1463 e = list_begin(&file_zombies); 1464 while (e) { 1465 struct fnamedic_item *fname = _get_entry(e, struct fnamedic_item, le); 1466 e = list_remove(&file_zombies, e); 1467 _fname_free(fname); 1468 } 1469 // Free the file list array 1470 free(file_list); 1471 1472 spin_lock(&bcache_lock); 1473 hash_free_active(&fnamedic, _bcache_free_fnamedic); 1474 spin_unlock(&bcache_lock); 1475 1476 spin_destroy(&bcache_lock); 1477 spin_destroy(&freelist_lock); 1478 rw_spin_destroy(&filelist_lock); 1479} 1480 1481