xref: /6.6.0/kv_engine/engines/ep/src/stored-value.h (revision 3d701d2d)
1/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 *     Copyright 2016 Couchbase, Inc
4 *
5 *   Licensed under the Apache License, Version 2.0 (the "License");
6 *   you may not use this file except in compliance with the License.
7 *   You may obtain a copy of the License at
8 *
9 *       http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *   Unless required by applicable law or agreed to in writing, software
12 *   distributed under the License is distributed on an "AS IS" BASIS,
13 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *   See the License for the specific language governing permissions and
15 *   limitations under the License.
16 */
17
18#pragma once
19
20#include "blob.h"
21#include "item_pager.h"
22#include "storeddockey.h"
23#include "tagged_ptr.h"
24#include "utility.h"
25
26#include <mcbp/protocol/datatype.h>
27#include <memcached/3rd_party/folly/AtomicBitSet.h>
28#include <memcached/types.h>
29#include <platform/n_byte_integer.h>
30
31#include <boost/intrusive/list.hpp>
32#include <memcached/durability_spec.h>
33#include <relaxed_atomic.h>
34
35class Item;
36class OrderedStoredValue;
37
38/**
39 * In-memory storage for an item.
40 *
41 * This class represents a single document which is present in the HashTable -
42 * essentially this is value_type used by HashTable.
43 *
44 * Overview
45 * ========
46 *
47 * It contains the documents' key, related metadata (CAS, rev, seqno, ...).
48 * It also has a pointer to the documents' value - which may be null if the
49 * value of the item is not currently resident (for example it's been evicted to
50 * save memory) or if the item has no value (deleted item has value as null)
51 * Additionally it contains flags to help HashTable manage the state of the
52 * item - such as dirty flag, NRU bits, and a `next` pointer to support
53 * chaining of StoredValues which hash to the same hash bucket.
54 *
55 * The key of the item is of variable length (from 1 to ~256 bytes). As an
56 * optimization, we allocate the key directly after the fixed size of
57 * StoredValue, so StoredValue and its key are contiguous in memory. This saves
58 * us the cost of an indirection compared to storing the key out-of-line, and
59 * the space of a pointer in StoredValue to point to the out-of-line
60 * allocation. It does, however complicate the management of StoredValue
61 * objects as they are now variable-sized - they must be created using a
62 * factory method (StoredValueFactory) and must be heap-allocated, managed
63 * using a unique_ptr (StoredValue::UniquePtr).
64 *
65 * Graphically the looks like:
66 *
67 *              StoredValue::UniquePtr
68 *                          |
69 *                          V
70 *               .-------------------.
71 *               | StoredValue       |
72 *               +-------------------+
73 *           {   | value [ptr]       | ======> Blob (nullptr if evicted)
74 *           {   | next  [ptr]       | ======> StoredValue (next in hash chain).
75 *     fixed {   | CAS               |
76 *    length {   | revSeqno          |
77 *           {   | ...               |
78 *           {   | datatype          |
79 *           {   | internal flags: isDirty, deleted, isOrderedStoredValue ...
80 *               + - - - - - - - - - +
81 *  variable {   | key[]             |
82 *   length  {   | ...               |
83 *               +-------------------+
84 *
85 * OrderedStoredValue
86 * ==================
87 *
88 * OrderedStoredValue is a "subclass" of StoredValue, which is used by
89 * Ephemeral buckets as it supports maintaining a seqno ordering of items in
90 * memory (for Persistent buckets this ordering is maintained on-disk).
91 *
92 * The implementation of OrderedStoredValue is tightly coupled to StoredValue
93 * so it will be described here:
94 *
95 * OrderedStoredValue has the fixed length members of StoredValue, then it's
96 * own fixed length fields (seqno list), followed finally by the variable
97 * length key (again, allocated contiguously):
98 *
99 *              StoredValue::UniquePtr
100 *                          |
101 *                          V
102 *               .--------------------.
103 *               | OrderedStoredValue |
104 *               +--------------------+
105 *           {   | value [ptr]        | ======> Blob (nullptr if evicted)
106 *           {   | next  [ptr]        | ======> StoredValue (next in hash chain).
107 *     fixed {   | StoredValue fixed ...
108 *    length {   + - - - - - - - - - -+
109 *           {   | seqno next [ptr]   |
110 *           {   | seqno prev [ptr]   |
111 *               + - - - - - - - - - -+
112 *  variable {   | key[]              |
113 *   length  {   | ...                |
114 *               +--------------------+
115 *
116 * To support dynamic dispatch (for example to lookup the key, whose location
117 * varies depending if it's StoredValue or OrderedStoredValue), we choose to
118 * use a manual flag-based dispatching (as opposed to a normal vTable based
119 * approach) as the per-object costs are much cheaper - 1 bit for the flag vs.
120 * 8 bytes for a vTable ptr.
121 * StoredValue::isOrderedStoredValue is set to false for StoredValue objects,
122 * and true for OrderedStoredValue objects, and then any methods
123 * needing dynamic dispatch read the value of the flag. Note this means that
124 * the 'base' class (StoredValue) needs to know about all possible subclasses
125 * (only one currently) and what class-specific code to call.
126 * Similary, deletion of OrderedStoredValue objects is delicate - we cannot
127 * safely delete via the base-class pointer directly, as that would only run
128 * ~StoredValue and not the members of the derived class. Instead a custom
129 * deleter is associated with StoredValue::UniquePtr, which checks the flag
130 * and dispatches to the correct destructor.
131 */
132class StoredValue {
133public:
134    /*
135     * Used at StoredValue->Item conversion, indicates whether the generated
136     * item exposes the CAS or not (i.e., the CAS is locked and the new item
137     * exposes a related special value).
138     */
139    enum class HideLockedCas : uint8_t { Yes, No };
140
141    /*
142     * Used at StoredValue->Item conversion, indicates whether the generated
143     * item includes the value or not (i.e., the new item carries only key and
144     * metadata).
145     */
146    enum class IncludeValue : uint8_t { Yes, No };
147
148    /**
149     * C++14 will call the sized delete version, but we
150     * allocate the object by using the new operator with a custom
151     * size (the key is packed after the object). We need to use
152     * the non-sized delete variant as the runtime don't know
153     * the size of the allocated object.
154     */
155    static void operator delete(void* ptr) {
156        ::operator delete(ptr);
157    }
158
159    /**
160     * Compress the value part of stored value. If the compressed document
161     * ends up being bigger than the original, then the method leaves the
162     * document inflated
163     *
164     * return true, if the compression was successful or if the compressed
165     *        document ends up being bigger than the original
166     *        false, otherwise
167     */
168    bool compressValue();
169
170    /**
171     * Replace the existing value with the given compressed buffer.
172     *
173     * @param deflated the input buffer holding compressed data
174     */
175    void storeCompressedBuffer(cb::const_char_buffer deflated);
176
177    // Custom deleter for StoredValue objects.
178    struct Deleter {
179        void operator()(StoredValue* val);
180    };
181
182    // Owning pointer type for StoredValue objects.
183    using UniquePtr = std::unique_ptr<StoredValue,
184            TaggedPtrDeleter<StoredValue, Deleter>>;
185
186    uint8_t getNRUValue() const;
187
188    void setNRUValue(uint8_t nru_val);
189
190    uint8_t incrNRUValue();
191
192    // Set the frequency counter value to the input value
193    void setFreqCounterValue(uint8_t newValue) {
194        auto tag = getValueTag();
195        tag.fields.frequencyCounter = newValue;
196        setValueTag(tag);
197    }
198
199    // Gets the frequency counter value
200    uint8_t getFreqCounterValue() const {
201        return getValueTag().fields.frequencyCounter;
202    }
203
204    void referenced();
205
206    /**
207     * Mark this item as needing to be persisted.
208     */
209    void markDirty() {
210        bits.set(dirtyIndex, true);
211    }
212
213    /**
214     * Mark this item as clean.
215     */
216    void markClean() {
217        bits.set(dirtyIndex, false);
218    }
219
220    /**
221     * True if this object is dirty.
222     */
223    bool isDirty() const {
224        return bits.test(dirtyIndex);
225    }
226
227    /**
228     * Check if the value is compressible
229     *
230     * @return true if the data is compressible
231     *         false if data is already compressed
232     *                  value doesn't exist
233     *                  value exists but has zero length
234     */
235    bool isCompressible() {
236        if (mcbp::datatype::is_snappy(datatype) || !valuelen()) {
237            return false;
238        }
239        return value->isCompressible();
240    }
241
242    bool eligibleForEviction(EvictionPolicy policy) const {
243        // Pending SyncWrite are always resident
244        if (isPending()) {
245            return false;
246        }
247
248        if (policy == EvictionPolicy::Value) {
249            return isResident() && !isDirty();
250        } else {
251            return !isDirty();
252        }
253    }
254
255    /**
256     * Check if this item is expired or not.
257     *
258     * @param asOf the time to be compared with this item's expiry time
259     * @return true if this item's expiry time < asOf
260     */
261    bool isExpired(time_t asOf) const {
262        if (getExptime() != 0 && getExptime() < asOf) {
263            return true;
264        }
265        return false;
266    }
267
268    /**
269     * True if this item is for the given key.
270     *
271     * @param k the key we're checking
272     * @return true if this item's key is equal to k
273     */
274    bool hasKey(const DocKey& k) const {
275        return getKey() == k;
276    }
277
278    /**
279     * Get this item's key.
280     */
281    const SerialisedDocKey& getKey() const {
282        return *const_cast<const SerialisedDocKey*>(
283                const_cast<StoredValue&>(*this).key());
284    }
285
286    /**
287     * Get this item's value.
288     */
289    const value_t &getValue() const {
290        return value;
291    }
292
293    /**
294     * Get the expiration time of this item.
295     *
296     * @return the expiration time.
297     */
298    time_t getExptime() const {
299        return exptime;
300    }
301
302    void setExptime(time_t tim) {
303        exptime = tim;
304        markDirty();
305    }
306
307    /**
308     * Get the client-defined flags of this item.
309     *
310     * @return the flags.
311     */
312    uint32_t getFlags() const {
313        return flags;
314    }
315
316    /**
317     * Set the client-defined flags for this item.
318     */
319    void setFlags(uint32_t fl) {
320        flags = fl;
321    }
322
323    /**
324     * get the items datatype
325     */
326    protocol_binary_datatype_t getDatatype() const {
327        return datatype;
328    }
329
330    /**
331     * Set the items datatype
332     */
333    void setDatatype(protocol_binary_datatype_t type) {
334        datatype = type;
335    }
336
337    void setUncompressible() {
338        if (value) {
339            value->setUncompressible();
340        }
341    }
342
343    /**
344     * Set a new value for this item.
345     *
346     * @param itm the item with a new value
347     */
348    void setValue(const Item& itm);
349
350    void markDeleted(DeleteSource delSource) {
351        setDeletedPriv(true);
352        setDeletionSource(delSource);
353        markDirty();
354    }
355
356    /**
357     * Eject an item value from memory.
358     */
359    void ejectValue();
360
361    /**
362     * Restore the value for this item.
363     *
364     * @param itm the item to be restored
365     */
366    void restoreValue(const Item& itm);
367
368    /**
369     * Restore the metadata of of a temporary item upon completion of a
370     * background fetch assuming the hashtable bucket is locked.
371     *
372     * @param itm the Item whose metadata is being restored
373     */
374    void restoreMeta(const Item& itm);
375
376    /**
377     * Get this item's CAS identifier.
378     *
379     * @return the cas ID
380     */
381    uint64_t getCas() const {
382        return cas;
383    }
384
385    /**
386     * Set a new CAS ID.
387     */
388    void setCas(uint64_t c) {
389        cas = c;
390    }
391
392    /**
393     * Lock this item until the given time.
394     */
395    void lock(rel_time_t expiry) {
396        if (isDeleted()) {
397            // Cannot lock Deleted items.
398            throw std::logic_error(
399                    "StoredValue::lock: Called on Deleted item");
400        }
401        lock_expiry_or_delete_or_complete_time.lock_expiry = expiry;
402    }
403
404    /**
405     * Unlock this item.
406     */
407    void unlock() {
408        if (isDeleted()) {
409            // Deleted items are not locked - just skip.
410            return;
411        }
412        lock_expiry_or_delete_or_complete_time.lock_expiry = 0;
413    }
414
415    /**
416     * True if this item has an ID.
417     *
418     * An item always has an ID after it's been persisted.
419     */
420    bool hasBySeqno() {
421        return bySeqno > 0;
422    }
423
424    /**
425     * Get this item's ID.
426     *
427     * @return the ID for the item; 0 if the item has no ID
428     */
429    int64_t getBySeqno() const {
430        return bySeqno;
431    }
432
433    /**
434     * Set the ID for this item.
435     *
436     * This is used by the persistene layer.
437     *
438     * It is an error to set an ID on an item that already has one.
439     */
440    void setBySeqno(int64_t to) {
441        if (to <= 0) {
442            throw std::invalid_argument("StoredValue::setBySeqno: to "
443                    "(which is " + std::to_string(to) + ") must be positive");
444        }
445        bySeqno = to;
446    }
447
448    // Marks the stored item as temporarily deleted
449    void setTempDeleted()
450    {
451        bySeqno = state_deleted_key;
452    }
453
454    // Marks the stored item as non-existent.
455    void setNonExistent()
456    {
457        bySeqno = state_non_existent_key;
458    }
459
460    /**
461     * Marks that the item's sequence number is pending (valid, non-temp; but
462     * final value not yet known.
463     */
464    void setPendingSeqno() {
465        bySeqno = state_pending_seqno;
466    }
467
468    /**
469     * Is this a temporary item created for processing a get-meta request?
470     */
471    bool isTempItem() const {
472        return (isTempNonExistentItem() || isTempDeletedItem() ||
473                isTempInitialItem());
474
475     }
476
477    /**
478     * Is this an initial temporary item?
479     */
480     bool isTempInitialItem() const {
481         return bySeqno == state_temp_init;
482    }
483
484    /**
485     * Is this a temporary item created for a non-existent key?
486     */
487    bool isTempNonExistentItem() const {
488         return bySeqno == state_non_existent_key;
489    }
490
491    /**
492     * Is this a temporary item created for a deleted key?
493     */
494    bool isTempDeletedItem() const {
495        return bySeqno == state_deleted_key;
496
497     }
498
499    size_t valuelen() const {
500        if (!value) {
501            return 0;
502        }
503        return value->valueSize();
504    }
505
506    /**
507     * Returns the uncompressed value length (if resident); else zero.
508     * For uncompressed values this is the same as valuelen().
509     */
510    size_t uncompressedValuelen() const;
511
512    /**
513     * Get the total size of this item.
514     *
515     * @return the amount of memory used by this item.
516     */
517    size_t size() const {
518        return getObjectSize() + valuelen();
519    }
520
521    /**
522     * Get the total uncompressed size of this item.
523     *
524     * @returns the amount of memory which would be used by this item if is it
525     * was uncompressed.
526     * For uncompressed items this is the same as size().
527     */
528    size_t uncompressedSize() const {
529        return getObjectSize() + uncompressedValuelen();
530    }
531
532    size_t metaDataSize() const {
533        return getObjectSize();
534    }
535
536    /**
537     * Return true if this item is locked as of the given timestamp.
538     *
539     * @param curtime lock expiration marker (usually the current time)
540     * @return true if the item is locked
541     */
542    bool isLocked(rel_time_t curtime) const {
543        if (isDeleted() || isCompleted()) {
544            // Deleted items cannot be locked.
545            return false;
546        }
547
548        if (lock_expiry_or_delete_or_complete_time.lock_expiry == 0 ||
549            (curtime > lock_expiry_or_delete_or_complete_time.lock_expiry)) {
550            return false;
551        }
552        return true;
553    }
554
555    /**
556     * True if this value is resident in memory currently.
557     */
558    bool isResident() const {
559        return bits.test(residentIndex);
560    }
561
562    void markNotResident() {
563        resetValue();
564        setResident(false);
565    }
566
567    /**
568     * Discard the value
569     * Side effects are that the frequency counter is cleared but the
570     * StoredValue age is not changed (it can still be a candidate for defrag).
571     */
572    void resetValue() {
573        auto age = getAge();
574        value.reset();
575        setAge(age);
576    }
577
578    /**
579     * Replace the value with the given pointer, ownership of the pointer is
580     * given to the StoredValue.
581     * @param data The Blob to take-over
582     */
583    void replaceValue(std::unique_ptr<Blob> data) {
584        // Maintain the tag
585        auto tag = getValueTag();
586        value.reset(data.release());
587        setValueTag(tag);
588    }
589
590    /**
591     * Replace the value with the given value_t
592     * @param value replace current value with this one
593     */
594    void replaceValue(const value_t& value) {
595        // Maintain the tag
596        auto tag = getValueTag();
597        this->value = value;
598        setValueTag(tag);
599    }
600
601    /**
602     * True if this object is logically deleted.
603     */
604    bool isDeleted() const {
605        return bits.test(deletedIndex);
606    }
607
608    /**
609     * Logically delete this object
610     * @param delSource The source of the deletion
611     * @return true if the item was deleted
612     */
613    bool del(DeleteSource delSource);
614
615    uint64_t getRevSeqno() const {
616        return revSeqno;
617    }
618
619    /**
620     * Set a new revision sequence number.
621     */
622    void setRevSeqno(uint64_t s) {
623        revSeqno = s;
624    }
625
626    /**
627     * Generate a new Item out of this StoredValue.
628     *
629     * @param vbid The vbucket containing the new item
630     * @param hideLockedCas Whether the new item will hide the CAS (i.e., CAS is
631     *     locked, the new item will expose CAS=-1)
632     * @param includeValue Whether we are keeping or discarding the value
633     * @param durabilityReqs If the StoredValue is a pending SyncWrite this
634     *        specifies the durability requirements for the item.
635     *
636     * @throws std::logic_error if the object is a pending SyncWrite and
637     *         requirements is /not/ specified.
638     */
639    std::unique_ptr<Item> toItem(
640            Vbid vbid,
641            HideLockedCas hideLockedCas = HideLockedCas::No,
642            IncludeValue includeValue = IncludeValue::Yes,
643            boost::optional<cb::durability::Requirements> durabilityReqs = {})
644            const;
645
646    /**
647     * Generate a new durable-abort Item.
648     *
649     * Note that all the StoredValue->Item conversions are covered in
650     * StoredValue::toItem(), except for durable-abort that is covered here.
651     * The reason is that in general we have a 1-to-1 relationship between
652     * StoredValue::CommittedState and Item::queue_op. That general case is
653     * handled by StoredValue::toItem(), which maps from CommittedState to
654     * queue_op.
655     * That is not true for durable-abort (which is the only exception). There
656     * is no concept of "aborted StoredValue", as at abort we just remove the
657     * Prepare from the HashTable.
658     * I.e., there is no CommittedState::abort (or similar), so we need to
659     * handle durable-abort in a dedicated conversion function.
660     *
661     * @param vbid The vbucket containing the new item
662     */
663    std::unique_ptr<Item> toItemAbort(Vbid vbid) const;
664
665    /**
666     * Get an item_info from the StoredValue
667     *
668     * @param vbuuid a VB UUID to set in to the item_info
669     * @returns item_info populated with the StoredValue's state if the
670     *                    StoredValue is not a temporary item (!::isTempItem()).
671     *                    If the object is a temporary item the optional is not
672     *                    initialised.
673     */
674    boost::optional<item_info> getItemInfo(uint64_t vbuuid) const;
675
676    void setNext(UniquePtr&& nextSv) {
677        if (isStalePriv()) {
678            throw std::logic_error(
679                    "StoredValue::setNext: StoredValue is stale,"
680                    "cannot set chain next value");
681        }
682        chain_next_or_replacement = std::move(nextSv);
683    }
684
685    UniquePtr& getNext() {
686        if (isStalePriv()) {
687            throw std::logic_error(
688                    "StoredValue::getNext: StoredValue is stale,"
689                    "cannot get chain next value");
690        }
691        return chain_next_or_replacement;
692    }
693
694    /**
695     * The age is get/set via the fragmenter
696     * @return the age of the StoredValue since it was allocated
697     */
698    uint8_t getAge() const;
699
700    /**
701     * The age is get/set via the fragmenter
702     * @param age a value to change the age field to.
703     */
704    void setAge(uint8_t age);
705
706    /**
707     * Increment the StoredValue's age field, this is a no-op if the age is 255.
708     */
709    void incrementAge();
710
711    /*
712     * Values of the bySeqno attribute used by temporarily created StoredValue
713     * objects.
714     */
715
716    /**
717     * Represents the state when a StoredValue is in the process of having its
718     * seqno updated (and so _will be_ non-temporary; but we don't yet have the
719     * new sequence number. This is unfortunately required due to limitations
720     * in sequence number generation; where we need delete an item in the
721     * HashTable before we know what its sequence number is going to be.
722     */
723    static const int64_t state_pending_seqno;
724
725    /// Represents an item that's deleted from memory but present on disk.
726    static const int64_t state_deleted_key;
727
728    /// Represents a non existent item
729    static const int64_t state_non_existent_key;
730
731    /**
732     * Represents a placeholder item created to mark that a bg fetch operation
733     * is pending.
734     */
735    static const int64_t state_temp_init;
736
737    /**
738     * Return the size in byte of this object; both the fixed fields and the
739     * variable-length key. Doesn't include value size (allocated externally).
740     */
741    inline size_t getObjectSize() const;
742
743    /**
744     * Reallocates the dynamic members of StoredValue. Used as part of
745     * defragmentation.
746     */
747    void reallocate();
748
749    /**
750     * Returns pointer to the subclass OrderedStoredValue if it the object is
751     * of the type, if not throws a bad_cast.
752     *
753     * Equivalent to dynamic cast, but done manually as we wanted to avoid
754     * vptr per object.
755     */
756    OrderedStoredValue* toOrderedStoredValue();
757    const OrderedStoredValue* toOrderedStoredValue() const;
758
759    /**
760     * Check if the contents of the StoredValue is same as that of the other
761     * one. Does not consider the intrusive hash bucket link.
762     *
763     * @param other The StoredValue to be compared with
764     */
765    bool operator==(const StoredValue& other) const;
766
767    bool operator!=(const StoredValue& other) const;
768
769    /// Return how many bytes are need to store item given key as a StoredValue
770    static size_t getRequiredStorage(const DocKey& key);
771
772    /**
773     * @return the deletion source of the stored value
774     */
775    DeleteSource getDeletionSource() const {
776        if (!isDeleted()) {
777            throw std::logic_error(
778                    "StoredValue::getDeletionSource: Called on a non-Deleted "
779                    "item");
780        }
781        return static_cast<DeleteSource>(deletionSource);
782    }
783
784    /// Returns if the stored value is pending or committed.
785    CommittedState getCommitted() const {
786        return static_cast<CommittedState>(committed);
787    }
788
789    /// Sets the Committed state of the SV to the specified value.
790    void setCommitted(CommittedState value) {
791        committed = static_cast<uint8_t>(value);
792    }
793
794    /// Returns if the stored value is a Pending SyncWrite.
795    bool isPending() const {
796        return (getCommitted() == CommittedState::Pending) ||
797               (getCommitted() == CommittedState::PreparedMaybeVisible);
798    }
799
800    /**
801     * Returns true if this is a Prepared SyncWrite which may already be
802     * visible, and hence blocks read access to any previous Committed value.
803     */
804    bool isPreparedMaybeVisible() const {
805        return getCommitted() == CommittedState::PreparedMaybeVisible;
806    }
807
808    /**
809     * Returns true if the stored value is Committed (ViaMutation or
810     * ViaPrepare).
811     */
812    bool isCommitted() const {
813        return !isPending();
814    }
815
816    /**
817     * Returns true if the stored value is Completed (by Abort or Commit).
818     */
819    bool isCompleted() const {
820        return (getCommitted() == CommittedState::PrepareAborted) ||
821               (getCommitted() == CommittedState::PrepareCommitted);
822    }
823
824    /**
825     * Set the time the item was completed or deleted at to the specified time.
826     *
827     * Only applicable for an OSV so we do nothing for a normal SV.
828     */
829    void setCompletedOrDeletedTime(time_t time);
830
831protected:
832    /**
833     * Constructor - protected as allocation needs to be done via
834     * StoredValueFactory.
835     *
836     * @param itm Item to base this StoredValue on.
837     * @param n The StoredValue which will follow the new stored value in
838     *           the hash bucket chain, which this new item will take
839     *           ownership of. (Typically the top of the hash bucket into
840     *           which the new item is being inserted).
841     * @param stats EPStats to update for this new StoredValue
842     * @param isOrdered Are we constructing an OrderedStoredValue?
843     */
844    StoredValue(const Item& itm,
845                UniquePtr n,
846                EPStats& stats,
847                bool isOrdered);
848
849    // Destructor. protected, as needs to be carefully deleted (via
850    // StoredValue::Destructor) depending on the value of isOrdered flag.
851    ~StoredValue();
852
853    /**
854     * Copy constructor - protected as allocation needs to be done via
855     * StoredValueFactory.
856     *
857     * @param other StoredValue being copied
858     * @param n The StoredValue which will follow the new stored value in
859     *           the hash bucket chain, which this new item will take
860     *           ownership of. (Typically the top of the hash bucket into
861     *           which the new item is being inserted).
862     * @param stats EPStats to update for this new StoredValue
863     */
864    StoredValue(const StoredValue& other,
865                UniquePtr n,
866                EPStats& stats);
867
868    /* Do not allow assignment */
869    StoredValue& operator=(const StoredValue& other) = delete;
870
871    /**
872     * Get the address of item's key .
873     */
874    inline SerialisedDocKey* key();
875
876    /**
877     * Logically mark this SV as deleted.
878     * Implementation for StoredValue instances (dispatched to by del() based
879     * on isOrdered==false).
880     * @param delSource The source of the deletion.
881     */
882    bool deleteImpl(DeleteSource delSource);
883
884    /**
885     * Baseline StoredValue->Item conversion function. Used internally by public
886     * (and more specific) conversion functions.
887     *
888     * @param vbid The vbucket containing this item
889     * @param hideLockedCas Whether the new item hides or exposes the CAS
890     * @param includeValue Whether we are keeping or discarding the value
891     */
892    std::unique_ptr<Item> toItemBase(Vbid vbid,
893                                     HideLockedCas hideLockedCas,
894                                     IncludeValue includeValue) const;
895
896    /* Update the value for this SV from the given item.
897     * Implementation for StoredValue instances (dispatched to by setValue()).
898     */
899    void setValueImpl(const Item& itm);
900
901    // name clash with public OSV isStale
902    bool isStalePriv() const {
903        return bits.test(staleIndex);
904    }
905
906    void setStale(bool value) {
907        bits.set(staleIndex, value);
908    }
909
910    bool isOrdered() const {
911        return bits.test(orderedIndex);
912    }
913
914    void setOrdered(bool value) {
915        bits.set(orderedIndex, value);
916    }
917
918    void setDeletedPriv(bool value) {
919        bits.set(deletedIndex, value);
920    }
921
922    void setResident(bool value) {
923        bits.set(residentIndex, value);
924    }
925
926    void setDirty(bool value) {
927        bits.set(dirtyIndex, value);
928    }
929
930    void setNru(uint8_t nru) {
931        bits.set(nruIndex1, nru & 1);
932        bits.set(nruIndex2, (nru & 2) >> 1);
933    }
934
935    uint8_t getNru() const {
936        // Not atomic across the two bits, recommend holding the HBL
937        return uint8_t(bits.test(nruIndex1)) |
938               (uint8_t(bits.test(nruIndex2)) << 1);
939    }
940
941    void setDeletionSource(DeleteSource delSource) {
942        deletionSource = static_cast<uint8_t>(delSource);
943    }
944
945    friend class StoredValueFactory;
946
947    /**
948     * Granting friendship to StoredValueProtected test fixture to access
949     * protected elements in order to test the implementation of StoredValue.
950     */
951    template <typename T>
952    friend class StoredValueProtectedTest;
953
954    // layout for the value TaggedPtr, access with getValueTag/setValueTag
955    union value_ptr_tag {
956        value_ptr_tag() : raw{0} {
957        }
958        value_ptr_tag(uint16_t raw) : raw(raw) {
959        }
960        uint16_t raw;
961
962        struct value_ptr_tag_fields {
963            uint8_t frequencyCounter;
964            uint8_t age;
965        } fields;
966    };
967
968    /// @return the tag part of the value TaggedPtr
969    value_ptr_tag getValueTag() const {
970        return value.get().getTag();
971    }
972
973    /// set the tag part of the value TaggedPtr
974    void setValueTag(value_ptr_tag tag) {
975        value.unsafeGetPointer().setTag(tag.raw);
976    }
977
978    /// Tagged pointer; contains both a pointer to the value (Blob) and a tag
979    /// which stores the frequency counter for this SV.
980    value_t value;
981
982    // Serves two purposes -
983    // 1. Used to implement HashTable chaining (for elements hashing to the same
984    // bucket).
985    // 2. Once the stored value has been marked stale, this is used to point at
986    // the replacement stored value. In this case, *we do not have ownership*,
987    // so we release the ptr in the destructor. The replacement is needed to
988    // determine if it would also appear in a given rangeRead - we should return
989    // only the newer version if so.
990    // Note: Using the tag portion of this pointer for metadata is difficult
991    // as this UniquePtr is exposed outside of this class and modified e.g.
992    // code that calls getNext then reset()/swap() will lose the tag bits.
993    // @todo: Re-factoring of the UniquePtr management is needed to safely use
994    // the tag.
995    UniquePtr chain_next_or_replacement; // 8 bytes (2-byte tag, 6 byte address)
996    uint64_t           cas;            //!< CAS identifier.
997    // bySeqno is atomic primarily for TSAN, which would flag that we write/read
998    // this in ephemeral backfills with different locks (which is true, but the
999    // access is we believe actually safe)
1000    cb::RelaxedAtomic<int64_t> bySeqno; //!< By sequence id number
1001
1002    // For alive items: GETL lock expiration. For deleted items: delete time.
1003    // For prepared items: the time at which they were completed.
1004    // Note that lock expiry uses rel_time_t, whereas deleted and completed
1005    // uses (32bit) time_t; hence union of both types used to minimise space.
1006    union LockExpiryOrDeleteTimeOrCompleteTime {
1007        rel_time_t lock_expiry;
1008        uint32_t delete_or_complete_time;
1009        LockExpiryOrDeleteTimeOrCompleteTime() : lock_expiry{0} {
1010        }
1011    } lock_expiry_or_delete_or_complete_time;
1012
1013    uint32_t           exptime;        //!< Expiration time of this item.
1014    uint32_t           flags;          // 4 bytes
1015    cb::uint48_t revSeqno; //!< Revision id sequence number
1016    protocol_binary_datatype_t datatype; // 1 byte
1017
1018    /**
1019     * Compressed members live in the AtomicBitSet
1020     */
1021    static constexpr size_t dirtyIndex = 0;
1022    static constexpr size_t deletedIndex = 1;
1023    // Bit 2 of bits is currently unused and may be used for new purposes.
1024    // static constexpr size_t unused = 2;
1025    // ordered := true if this is an instance of OrderedStoredValue
1026    static constexpr size_t orderedIndex = 3;
1027    // 2 bit nru managed via setNru/getNru
1028    static constexpr size_t nruIndex1 = 4;
1029    static constexpr size_t nruIndex2 = 5;
1030    static constexpr size_t residentIndex = 6;
1031    // stale := Indicates if a newer instance of the item is added. Logically
1032    //          part of OSV, but is physically located in SV as there are spare
1033    //          bits here. Guarded by the SequenceList's writeLock.
1034    static constexpr size_t staleIndex = 7;
1035
1036    folly::AtomicBitSet<sizeof(uint8_t)> bits;
1037
1038    /**
1039     * Second byte of flags. These are stored in a plain packed bitfield as no
1040     * requirement for atomicity (i.e. either const or always modified under
1041     * HashBucketLock.
1042     */
1043
1044    /// If the stored value is deleted, this stores the source of its deletion.
1045    uint8_t deletionSource : 1;
1046    /// 3-bit value which encodes the CommittedState of the StoredValue
1047    uint8_t committed : 3;
1048
1049    friend std::ostream& operator<<(std::ostream& os, const StoredValue& sv);
1050    friend void to_json(nlohmann::json& json, const StoredValue& sv);
1051};
1052
1053void to_json(nlohmann::json& json, const StoredValue& sv);
1054std::ostream& operator<<(std::ostream& os, const StoredValue& sv);
1055
1056/**
1057 * Subclass of StoredValue which additionally supports sequence number ordering.
1058 *
1059 * See StoredValue for implementation details.
1060 */
1061class OrderedStoredValue : public StoredValue {
1062public:
1063    /* Do not allow assignment */
1064    OrderedStoredValue& operator=(const OrderedStoredValue& other) = delete;
1065    OrderedStoredValue& operator=(OrderedStoredValue&& other) = delete;
1066
1067    ~OrderedStoredValue() {
1068        if (isStalePriv()) {
1069            // This points to the replacement OSV which we do not actually own.
1070            // We are reusing a unique_ptr so we explicitly release it in this
1071            // case. We /do/ own the chain_next if we are not stale.
1072            chain_next_or_replacement.release();
1073        }
1074    }
1075
1076    /**
1077     * True if a newer version of the same key exists in the HashTable.
1078     * Note: Only true for OrderedStoredValues which are no longer in the
1079     *       HashTable (and only live in SequenceList)
1080     * @param writeGuard The locked SeqList writeLock which guards the stale
1081     * param.
1082     */
1083    bool isStale(std::lock_guard<std::mutex>& writeGuard) const {
1084        return isStalePriv();
1085    }
1086
1087    /**
1088     * Marks that newer instance of this item is added in the HashTable
1089     * @param writeLock The SeqList writeLock which guards the stale param.
1090     */
1091    void markStale(std::lock_guard<std::mutex>& writeGuard,
1092                   StoredValue* newSv) {
1093        // next is a UniquePtr which is up to this point was used for chaining
1094        // in the HashTable. Now this item is stale, we are reusing this to
1095        // point to the updated version of this StoredValue. _BUT_ we do not
1096        // own the new SV. At destruction, we must release this ptr if
1097        // we are stale.
1098        chain_next_or_replacement.reset(newSv);
1099        setStale(true);
1100    }
1101
1102    StoredValue* getReplacementIfStale(
1103            std::lock_guard<std::mutex>& writeGuard) const {
1104        if (!isStalePriv()) {
1105            return nullptr;
1106        }
1107
1108        return chain_next_or_replacement.get().get();
1109    }
1110
1111    /**
1112     * Return the time the item was deleted. Only valid for completed
1113     * (SyncWrites) or deleted items.
1114     */
1115    time_t getCompletedOrDeletedTime() const;
1116
1117    /**
1118     * Check if the contents of the StoredValue is same as that of the other
1119     * one. Does not consider the intrusive hash bucket link.
1120     *
1121     * @param other The StoredValue to be compared with
1122     */
1123    bool operator==(const OrderedStoredValue& other) const;
1124
1125    /// Return how many bytes are need to store item with given key as an
1126    /// OrderedStoredValue
1127    static size_t getRequiredStorage(const DocKey& key);
1128
1129    /**
1130     * C++14 will call the sized delete version, but we
1131     * allocate the object by using the new operator with a custom
1132     * size (the key is packed after the object). We need to use
1133     * the non-sized delete variant as the runtime don't know
1134     * the size of the allocated object.
1135     */
1136    static void operator delete(void* ptr) {
1137        ::operator delete(ptr);
1138    }
1139
1140    /**
1141     * Set the time the item was completed (SyncWrite) or deleted at to the
1142     * specified time.
1143     */
1144    void setCompletedOrDeletedTime(time_t time);
1145
1146    void setPrepareSeqno(int64_t prepareSeqno) {
1147        this->prepareSeqno = prepareSeqno;
1148    }
1149
1150protected:
1151    SerialisedDocKey* key() {
1152        return reinterpret_cast<SerialisedDocKey*>(this + 1);
1153    }
1154
1155    /**
1156     * Logically mark this OSV as deleted. Implementation for
1157     * OrderedStoredValue instances (dispatched to by del() based on
1158     * isOrdered==true).
1159     */
1160    bool deleteImpl(DeleteSource delSource);
1161
1162    /* Update the value for this OSV from the given item.
1163     * Implementation for OrderedStoredValue instances (dispatched to by
1164     * setValue()).
1165     */
1166    void setValueImpl(const Item& itm);
1167
1168private:
1169    // Constructor. Private, as needs to be carefully created via
1170    // OrderedStoredValueFactory.
1171    OrderedStoredValue(const Item& itm,
1172                       UniquePtr n,
1173                       EPStats& stats)
1174        : StoredValue(itm, std::move(n), stats, /*isOrdered*/ true) {
1175    }
1176
1177    // Copy Constructor. Private, as needs to be carefully created via
1178    // OrderedStoredValueFactory.
1179    //
1180    // Only StoredValue part (Hash Chain included) is copied. Hence the copied
1181    // StoredValue will be in the HashTable, but not in the ordered
1182    // data structure.
1183    OrderedStoredValue(const StoredValue& other,
1184                       UniquePtr n,
1185                       EPStats& stats)
1186        : StoredValue(other, std::move(n), stats) {
1187    }
1188
1189    // Prepare seqno of a commit or abort StoredValue.
1190    // @TODO perf. We should only store this for commits and aborts, not all
1191    // OrderedStoredValues
1192    cb::uint48_t prepareSeqno;
1193
1194    friend std::ostream& operator<<(std::ostream& os, const StoredValue& sv);
1195    friend void to_json(nlohmann::json& json, const StoredValue& sv);
1196
1197public:
1198    // Intrusive linked-list for sequence number ordering.
1199    // Guarded by the SequenceList's writeLock.
1200    // Logically private to the object, however Boost requires it to be public.
1201    boost::intrusive::list_member_hook<> seqno_hook;
1202
1203    // Grant friendship so our factory can call our (private) constructor.
1204    friend class OrderedStoredValueFactory;
1205
1206    // Grant friendship to base class so it can perform flag dispatch to our
1207    // overridden protected methods.
1208    friend class StoredValue;
1209};
1210
1211SerialisedDocKey* StoredValue::key() {
1212    // key is located immediately following the object.
1213    if (isOrdered()) {
1214        return static_cast<OrderedStoredValue*>(this)->key();
1215    } else {
1216        return reinterpret_cast<SerialisedDocKey*>(this + 1);
1217    }
1218}
1219
1220size_t StoredValue::getObjectSize() const {
1221    // Size of fixed part of OrderedStoredValue or StoredValue, plus size of
1222    // (variable) key.
1223    if (isOrdered()) {
1224        return sizeof(OrderedStoredValue) + getKey().getObjectSize();
1225    }
1226    return sizeof(*this) + getKey().getObjectSize();
1227}
1228