1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  *     Copyright 2017 Couchbase, Inc
4  *
5  *   Licensed under the Apache License, Version 2.0 (the "License");
6  *   you may not use this file except in compliance with the License.
7  *   You may obtain a copy of the License at
8  *
9  *       http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *   Unless required by applicable law or agreed to in writing, software
12  *   distributed under the License is distributed on an "AS IS" BASIS,
13  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *   See the License for the specific language governing permissions and
15  *   limitations under the License.
16  */
17 
18 /**
19  * This header file contains the abstract base class for the classes that hold
20  * ordered sequence of items in memory. This is not generic, holds only ordered
21  * seq of OrderedStoredValue's. (OrderedStoredValue is the in-memory
22  * representation of an
23  * item)
24  */
25 
26 #pragma once
27 
28 #include <mutex>
29 #include <vector>
30 
31 #include "config.h"
32 #include "item.h"
33 #include "memcached/engine_error.h"
34 #include "stored-value.h"
35 
36 #include <boost/optional/optional.hpp>
37 
38 /* [EPHE TODO]: Check if uint64_t can be used instead */
39 using seqno_t = int64_t;
40 
41 /**
42  * SequenceList is the abstract base class for the classes that hold ordered
43  * sequence of items in memory. To store/retrieve items sequentially in memory,
44  * in our multi-threaded, monotonically increasing seq model, we need these
45  * classes around basic data structures like Linkedlist or Skiplist.
46  *
47  * 'OrderedStoredValue' is the in-memory representation of an item that can be
48  * stored sequentially and SequenceList derivatives store a sequence of
49  * 'OrderedStoredValues'. Based on the implementation of SequenceList,
50  * OrderedStoredValue might have to carry an intrusive component that
51  * is necessary for the list.
52  *
53  * SequenceList expects the module that contains it, takes the responsibility
54  * of serializing the OrderedStoredValue. SequenceList only guarantees to hold
55  * the OrderedStoredValue in the order they are sent to it. For this it forces
56  * the write calls to pass the seq lock held by them
57  *
58  * Note: These classes only handle the position of the 'OrderedStoredValue' in
59  *       the ordered sequence desired. They do not update a OrderedStoredValue.
60  *
61  *       In the file 'item', 'OrderedStoredValue' are used interchangeably
62  *
63  * [EPHE TODO]: create a documentation (with diagrams) explaining write, update,
64  *              rangeRead and Clean Up
65  */
66 class SequenceList {
67 protected:
68     /**
69      * Abstract base class for implementation of range iterators in derived
70      * classes of SequenceList.
71      *
72      * (a) The API is for unidirectional (forward only) iterator.
73      * (b) Iterator cannot be invalidated while in use.
74      * (c) Reading all the items from the iterator results in point-in-time
75      *     snapshot.
76      * (d) Only 1 iterator can be created for now.
77      * (e) Currently iterator can be created only from start till end
78      */
79     class RangeIteratorImpl {
80     public:
~RangeIteratorImpl()81         virtual ~RangeIteratorImpl() {
82         }
83 
84         /**
85          * Returns the stored item at iterator's position
86          */
87         virtual OrderedStoredValue& operator*() const = 0;
88 
89         /**
90          * Pre increment of the iterator position
91          *
92          * Note: We do not allow post increment for now as we support only
93          *       one iterator at a time. (hence, we don't create a temp copy
94          *       of the iterator obj)
95          */
96         virtual RangeIteratorImpl& operator++() = 0;
97 
98         /**
99          * Curr iterator position, indicated by the seqno of the item at that
100          * position
101          */
102         virtual seqno_t curr() const = 0;
103 
104         /**
105          * End position of iterator, indicated by the seqno > highest_seqno
106          * in the iterator range
107          */
108         virtual seqno_t end() const = 0;
109 
110         /**
111          * Seqno of the last item in the iterator
112          */
113         virtual seqno_t back() const = 0;
114 
115         /**
116          * Returns the number of items that can be iterated over by this
117          * (forward only) iterator at that instance
118          */
119         virtual uint64_t count() const = 0;
120 
121         /**
122          * Indicates the minimum seqno in the iterator that must be read to
123          * get a consistent read snapshot
124          */
125         virtual seqno_t getEarlySnapShotEnd() const = 0;
126     };
127 
128 public:
129     /**
130      * Indicates whether the updateListElem is successful or the list is
131      * allowing
132      * only appends at the moment.
133      */
134     enum class UpdateStatus { Success, Append };
135 
136     /**
137      * RangeIterator for a SequenceList objects.
138      *
139      * The iterator is unidirectional (forward only) and cannot be invalidated
140      * while in use. That means reading the items in the iterator results in a
141      * valid point-in-time snapshot. But this adds a caveat that while an
142      * iterator instance is active, certain invariants are to be met to get the
143      * snapshot, and this happens at the expense of increased memory usage.
144      *
145      * Implementation follows the pimpl idiom. This class serves as a wrapper
146      * for the abstract base class that defines the APIs for range iteration on
147      * the derived concrete classes of the SequenceList. It calls the range
148      * iterators of the concrete classes polymorphically at runtime. However
149      * clients get this statically typed "RangeIterator" object when they
150      * request for an iterator for a SequenceList object.
151      *
152      * Usage:
153      * 1. Create an iterator instance using SequenceList::makeRangeIterator()
154      * 2. Iterate (read) through all the list elements.
155      * 3. Delete the iterator.
156      * Note: (a) Do not hold the iterator for long, as it will result in stale
157      *           items in list and hence increased memory usage.
158      *       (b) Make sure to delete the iterator after using it.
159      *       (c) For now, we allow on one RangeIterator. Try to create more
160      *           iterators will result in the create call(s) being blocked.
161      */
162     class RangeIterator {
163     public:
164         RangeIterator(std::unique_ptr<RangeIteratorImpl> rangeIterImpl);
165 
166         /* Needed for MSVC.
167            MSVC does not do "return value optimization" if copy constructor is
168            defined before move constructor
169            (http://stackoverflow.com/questions/29459040)
170          */
RangeIterator(RangeIterator&& other)171         RangeIterator(RangeIterator&& other)
172             : rangeIterImpl(std::move(other.rangeIterImpl)) {
173         }
174 
operator =(RangeIterator&& other)175         RangeIterator& operator=(RangeIterator&& other) {
176             rangeIterImpl = std::move(other.rangeIterImpl);
177             return *this;
178         }
179 
180         RangeIterator(RangeIterator& other) = delete;
181 
182         /**
183          * Returns the stored item at iterator's position
184          */
185         OrderedStoredValue& operator*() const;
186 
187         /**
188          * Pre increment of the iterator position
189          */
190         RangeIterator& operator++();
191 
192         /**
193          * Curr iterator position, indicated by the seqno of the item at that
194          * position
195          */
196         seqno_t curr() const;
197 
198         /**
199          * End position of iterator, indicated by the seqno > highest_seqno
200          * in the iterator range
201          */
202         seqno_t end() const;
203 
204         /**
205          * Seqno of the last item in the iterator
206          */
207         seqno_t back() const;
208 
209         /**
210          * Returns the number of items that can be iterated over by this
211          * (forward only) iterator at that instance
212          */
213         uint64_t count() const;
214 
215         /**
216          * Indicates the minimum seqno in the iterator that must be read to
217          * get a consistent read snapshot
218          */
219         seqno_t getEarlySnapShotEnd() const;
220 
221     private:
222         /* Pointer to the abstract class of range iterator implementation */
223         std::unique_ptr<RangeIteratorImpl> rangeIterImpl;
224     };
225 
~SequenceList()226     virtual ~SequenceList() {
227     }
228 
229     /**
230      * Add a new item at the end of the list.
231      *
232      * @param seqLock A sequence lock the calling module is expected to hold.
233      * @param writeLock Write lock of the sequenceList from getListWriteLock()
234      * @param v Ref to orderedStoredValue which will placed into the linked list
235      *          Its intrusive list links will be updated.
236      */
237     virtual void appendToList(std::lock_guard<std::mutex>& seqLock,
238                               std::lock_guard<std::mutex>& writeLock,
239                               OrderedStoredValue& v) = 0;
240 
241     /**
242      * If possible, update an existing element the list and move it to end.
243      * If there is a range read in the position of the element being updated
244      * we do not allow the update and indicate the caller to do an append.
245      *
246      * @param seqLock A sequence lock the calling module is expected to hold.
247      * @param writeLock Write lock of the sequenceList from getListWriteLock()
248      * @param v Ref to orderedStoredValue which will placed into the linked list
249      *          Its intrusive list links will be updated.
250      *
251      * @return UpdateStatus::Success list element has been updated and moved to
252      *                               end.
253      *         UpdateStatus::Append list element is *not* updated. Caller must
254      *                              handle the append.
255      */
256     virtual SequenceList::UpdateStatus updateListElem(
257             std::lock_guard<std::mutex>& seqLock,
258             std::lock_guard<std::mutex>& writeLock,
259             OrderedStoredValue& v) = 0;
260 
261     /**
262      * Provides point-in-time snapshots which can be used for incremental
263      * replication.
264      *
265      * Copies the StoredValues as a vector of ref counterd items starting from
266      * 'start + 1' seqno into 'items' as a snapshot.
267      *
268      * Since we use monotonically increasing point-in-time snapshots we cannot
269      * guarantee that the snapshot ends at the requested end seqno. Due to
270      * dedups we may have to send till a higher seqno in the snapshot.
271      *
272      * @param start requested start seqno
273      * @param end requested end seqno
274      *
275      * @return ENGINE_SUCCESS, items in the snapshot and adjusted endSeqNo
276      *         ENGINE_ENOMEM on no memory to copy items
277      *         ENGINE_ERANGE on incorrect start and end
278      */
279     virtual std::tuple<ENGINE_ERROR_CODE, std::vector<UniqueItemPtr>, seqno_t>
280     rangeRead(seqno_t start, seqno_t end) = 0;
281 
282     /**
283      * Updates the highSeqno in the list. Since seqno is generated and managed
284      * outside the list, the module managing it must update this after the seqno
285      * is generated for the item already put in the list.
286      *
287      * @param listWriteLg Write lock of the sequenceList from getListWriteLock()
288      * @param v Ref to orderedStoredValue
289      */
290     virtual void updateHighSeqno(std::lock_guard<std::mutex>& listWriteLg,
291                                  const OrderedStoredValue& v) = 0;
292 
293     /**
294      * Updates the highestDedupedSeqno in the list. Since seqno is generated and
295      * managed outside the list, the module managing it must update this after
296      * the seqno is generated for the item already put in the list.
297      *
298      * @param listWriteLg Write lock of the sequenceList from getListWriteLock()
299      * @param v Ref to orderedStoredValue
300      *
301      */
302     virtual void updateHighestDedupedSeqno(
303             std::lock_guard<std::mutex>& listWriteLg,
304             const OrderedStoredValue& v) = 0;
305 
306     /**
307      * Mark an OrderedStoredValue stale and assumes its ownership. Stores ptr to
308      * the newer version in the OSV, or nullptr if there is no newer version
309      * (i.e., expired Tombstone)
310      * Note: It is upto the sequential data structure implementation how it
311      *       wants to own the OrderedStoredValue (as owned type vs non-owned
312      *       type)
313      *
314      * @param listWriteLg Write lock of the sequenceList from getListWriteLock()
315      * @param ownedSv StoredValue whose ownership is passed to the sequential
316      *                data structure.
317      * @param replacement StoredValue which supersedes ownedSv, or nullptr.
318      */
319     virtual void markItemStale(std::lock_guard<std::mutex>& listWriteLg,
320                                StoredValue::UniquePtr ownedSv,
321                                StoredValue* replacement) = 0;
322 
323     /**
324      * Remove from sequence list and delete all OSVs which are purgable.
325      * OSVs which can be purged are items which are outside the ReadRange and
326      * are Stale.
327      *
328      * @param purgeUpToSeqno Indicates the max seqno (inclusive) that could be
329      *                       purged
330      * @param shouldPause Callback function that indicates if tombstone purging
331      *                    should pause. This is called for every element in the
332      *                    sequence list when we iterate over the list during the
333      *                    purge. The caller should decide if the purge should
334      *                    continue or if it should be paused (in case it is
335      *                    running for a long time). By default, we assume that
336      *                    the tombstone purging need not be paused at all
337      *
338      * @return The number of items purged from the sequence list (and hence
339      *         deleted).
340      */
341     virtual size_t purgeTombstones(seqno_t purgeUpToSeqno,
342                                    std::function<bool()> shouldPause = []() {
343                                        return false;
344                                    }) = 0;
345 
346     /**
347      * Updates the number of deleted items in the sequence list whenever
348      * an item is modified.
349      *
350      * @param oldDeleted Was the old item deleted?
351      * @param newDeleted Was the new item replacing it deleted?
352      */
353     virtual void updateNumDeletedItems(bool oldDeleted, bool newDeleted) = 0;
354 
355     /**
356      * Returns the number of stale items in the list.
357      *
358      * @return count of stale items
359      */
360     virtual uint64_t getNumStaleItems() const = 0;
361 
362     /**
363      * Return the count of bytes of the values of stale items in the list.
364      */
365     virtual size_t getStaleValueBytes() const = 0;
366 
367     /**
368      * Return the count of bytes of the metadata of stale items in the list.
369      */
370     virtual size_t getStaleMetadataBytes() const = 0;
371 
372     /**
373      * Returns the number of deleted items in the list.
374      *
375      * @return count of deleted items
376      */
377     virtual uint64_t getNumDeletedItems() const = 0;
378 
379     /**
380      * Returns the number of items in the list.
381      *
382      * @return count of items
383      */
384     virtual uint64_t getNumItems() const = 0;
385 
386     /**
387      * Returns the highSeqno in the list.
388      */
389     virtual uint64_t getHighSeqno() const = 0;
390 
391     /**
392      * Returns the highest de-duplicated sequence number in the list.
393      */
394     virtual uint64_t getHighestDedupedSeqno() const = 0;
395 
396     /**
397      * Returns the highest purged Deleted sequence number in the list.
398      */
399     virtual seqno_t getHighestPurgedDeletedSeqno() const = 0;
400 
401     /**
402      * Returns the current range read begin sequence number.
403      */
404     virtual uint64_t getRangeReadBegin() const = 0;
405 
406     /**
407      * Returns the current range read end sequence number.
408      */
409     virtual uint64_t getRangeReadEnd() const = 0;
410 
411     /**
412      * Returns the lock which must be held to make append/update to the seqList
413      * + the updation of the corresponding highSeqno or the
414      * highestDedupedSeqno atomic
415      */
416     virtual std::mutex& getListWriteLock() const = 0;
417 
418     /**
419      * Creates a range iterator for the underlying SequenceList 'optionally'.
420      * Under scenarios like where we want to limit the number of range iterators
421      * the SequenceList, new range iterator will not be allowed
422      *
423      * @param isBackfill indicates if the iterator is for backfill (for debug)
424      *
425      * @return range iterator object when possible
426      *         null when not possible
427      */
428     virtual boost::optional<SequenceList::RangeIterator> makeRangeIterator(
429             bool isBackfill) = 0;
430 
431     /**
432      * Debug - prints a representation of the list to stderr.
433      */
434     virtual void dump() const = 0;
435 };
436