1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  *     Copyright 2020 Couchbase, Inc
4  *
5  *   Licensed under the Apache License, Version 2.0 (the "License");
6  *   you may not use this file except in compliance with the License.
7  *   You may obtain a copy of the License at
8  *
9  *       http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *   Unless required by applicable law or agreed to in writing, software
12  *   distributed under the License is distributed on an "AS IS" BASIS,
13  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *   See the License for the specific language governing permissions and
15  *   limitations under the License.
16  */
17 
18 #pragma once
19 
20 #include "storeddockey_fwd.h"
21 
22 #include "ep_types.h"
23 
24 #include <memcached/dockey.h>
25 #include <gsl/gsl>
26 #include <limits>
27 #include <string>
28 #include <type_traits>
29 
30 class SerialisedDocKey;
31 
32 /**
33  * StoredDocKey is a container for key data
34  *
35  * Internally an n byte key is stored in a n + sizeof(CollectionID) std::string.
36  *  a) We zero terminate so that data() is safe for printing as a c-string.
37  *  b) The CollectionID is stored before the key string (using LEB128 encoding).
38  *    This is because StoredDocKey typically ends up being written to disk and
39  *    the CollectionID forms part of the on-disk key. Accounting and for for the
40  *    CollectionID means storage components don't have to create a new buffer
41  *    into which they can layout CollectionID and key data.
42  *
43  * StoredDocKeyT is templated over an Allocator type. A StoredDocKey using
44  * declaration provides a StoredDocKeyT with std::allocator which is suitable
45  * for most purposes. The Allocator here allows us to track checkpoint memory
46  * overhead accurately when the key (std::string is the underlying type)
47  * requires a heap allocation. This varies based on platform but is typically
48  * keys over 16 or 24 bytes.
49  */
50 template <template <class> class Allocator>
51 class StoredDocKeyT : public DocKeyInterface<StoredDocKeyT<Allocator>> {
52 public:
53     using allocator_type = Allocator<std::string::value_type>;
54 
55     /**
56      * Construct empty - required for some std containers
57      */
58     StoredDocKeyT() = default;
59 
60     /**
61      * Create a StoredDocKey from a DocKey
62      *
63      * @param key DocKey that is to be copied-in
64      */
StoredDocKeyT(const DocKey& key)65     StoredDocKeyT(const DocKey& key) : StoredDocKeyT(key, allocator_type()) {
66     }
67 
68     /**
69      * Create a StoredDocKey from a DocKey
70      *
71      * @param key DocKey that is to be copied-in
72      */
73     StoredDocKeyT(const DocKey& key, allocator_type allocator);
74 
75     /**
76      * Create a StoredDocKey from a std::string (test code uses this)
77      *
78      * @param key std::string to be copied-in
79      * @param cid the CollectionID that the key applies to (and will be encoded
80      *        into the stored data)
81      */
82     StoredDocKeyT(const std::string& key, CollectionID cid);
83 
keyData() const84     const char* keyData() const {
85         return keydata.data();
86     }
87 
data() const88     const uint8_t* data() const {
89         return reinterpret_cast<const uint8_t*>(keydata.data());
90     }
91 
size() const92     size_t size() const {
93         return keydata.size();
94     }
95 
96     CollectionID getCollectionID() const;
97 
getEncoding() const98     DocKeyEncodesCollectionId getEncoding() const {
99         return DocKeyEncodesCollectionId::Yes;
100     }
101 
102     /**
103      * @return a DocKey that views this StoredDocKey but without any
104      * collection-ID prefix.
105      */
106     DocKey makeDocKeyWithoutCollectionID() const;
107 
108     /**
109      * Intended for debug use only
110      * @returns cid:key
111      */
112     std::string to_string() const;
113 
114     /**
115      * For tests only
116      * @returns the 'key' part of the StoredDocKey
117      */
118     const char* c_str() const;
119 
compare(const StoredDocKeyT& rhs) const120     int compare(const StoredDocKeyT& rhs) const {
121         return keydata.compare(rhs.keydata);
122     }
123 
operator ==(const StoredDocKeyT& rhs) const124     bool operator==(const StoredDocKeyT& rhs) const {
125         return keydata == rhs.keydata;
126     }
127 
operator !=(const StoredDocKeyT& rhs) const128     bool operator!=(const StoredDocKeyT& rhs) const {
129         return !(*this == rhs);
130     }
131 
operator <(const StoredDocKeyT& rhs) const132     bool operator<(const StoredDocKeyT& rhs) const {
133         return keydata < rhs.keydata;
134     }
135 
operator DocKey() const136     operator DocKey() const {
137         return {cb::const_char_buffer(keydata.data(), keydata.size()),
138                 DocKeyEncodesCollectionId::Yes};
139     }
140 
141 protected:
142     std::basic_string<std::string::value_type,
143                       std::string::traits_type,
144                       allocator_type>
145             keydata;
146 };
147 
148 std::ostream& operator<<(std::ostream& os, const StoredDocKey& key);
149 
150 static_assert(sizeof(CollectionID) == sizeof(uint32_t),
151               "StoredDocKey: CollectionID has changed size");
152 
153 /**
154  * A hash function for StoredDocKey so they can be used in std::map and friends.
155  */
156 namespace std {
157 template <template <class> class Allocator>
158 struct hash<StoredDocKeyT<Allocator>> {
operator ()std::hash159     std::size_t operator()(const StoredDocKeyT<Allocator>& key) const {
160         return key.hash();
161     }
162 };
163 }
164 
165 class MutationLogEntryV2;
166 class StoredValue;
167 
168 /**
169  * SerialisedDocKey maintains the key data in an allocation that is not owned by
170  * the class. The class is essentially immutable, providing a "view" onto the
171  * larger block.
172  *
173  * For example where a StoredDocKey needs to exist as part of a bigger block of
174  * data, SerialisedDocKey is the class to use.
175  *
176  * A limited number of classes are friends and only those classes can construct
177  * a SerialisedDocKey.
178  */
179 class SerialisedDocKey : public DocKeyInterface<SerialisedDocKey> {
180 public:
181     /**
182      * The copy constructor is deleted due to the bytes living outside of the
183      * object.
184      */
185     SerialisedDocKey(const SerialisedDocKey& obj) = delete;
186 
data() const187     const uint8_t* data() const {
188         return bytes;
189     }
190 
size() const191     size_t size() const {
192         return length;
193     }
194 
195     CollectionID getCollectionID() const;
196 
getEncoding() const197     DocKeyEncodesCollectionId getEncoding() const {
198         return DocKeyEncodesCollectionId::Yes;
199     }
200 
201     bool operator==(const DocKey& rhs) const;
202 
203     /**
204      * Return how many bytes are (or need to be) allocated to this object
205      */
getObjectSize() const206     size_t getObjectSize() const {
207         return getObjectSize(length);
208     }
209 
210     /**
211      * Return how many bytes are needed to store the DocKey
212      * @param key a DocKey that needs to be stored in a SerialisedDocKey
213      */
getObjectSize(const DocKey key)214     static size_t getObjectSize(const DocKey key) {
215         return getObjectSize(key.size());
216     }
217 
218     /**
219      * Create a SerialisedDocKey and return a unique_ptr to the object.
220      * Note that the allocation is bigger than sizeof(SerialisedDocKey)
221      * @param key a DocKey to be stored as a SerialisedDocKey
222      */
223     struct SerialisedDocKeyDelete {
operator ()SerialisedDocKey::SerialisedDocKeyDelete224         void operator()(SerialisedDocKey* p) {
225             p->~SerialisedDocKey();
226             delete[] reinterpret_cast<uint8_t*>(p);
227         }
228     };
229 
operator DocKey() const230     operator DocKey() const {
231         return {bytes, length, DocKeyEncodesCollectionId::Yes};
232     }
233 
234     /**
235      * make a SerialisedDocKey and return a unique_ptr to it - this is used
236      * in test code only.
237      */
make( const StoredDocKey& key)238     static std::unique_ptr<SerialisedDocKey, SerialisedDocKeyDelete> make(
239             const StoredDocKey& key) {
240         std::unique_ptr<SerialisedDocKey, SerialisedDocKeyDelete> rval(
241                 reinterpret_cast<SerialisedDocKey*>(
242                         new uint8_t[getObjectSize(key)]));
243         new (rval.get()) SerialisedDocKey(key);
244         return rval;
245     }
246 
247 protected:
248     /**
249      * These following classes are "white-listed". They know how to allocate
250      * and construct this object so are allowed access to the constructor.
251      */
252     friend class MutationLogEntryV2;
253     friend class MutationLogEntryV3;
254     friend class StoredValue;
255 
SerialisedDocKey()256     SerialisedDocKey() : length(0), bytes() {
257     }
258 
259     /**
260      * Create a SerialisedDocKey from a DocKey. Protected constructor as
261      * this must be used by friends who know how to pre-allocate the object
262      * storage
263      * @param key a DocKey to be copied in
264      */
SerialisedDocKey(const DocKey& key)265     SerialisedDocKey(const DocKey& key)
266         : length(gsl::narrow_cast<uint8_t>(key.size())) {
267         if (key.getEncoding() == DocKeyEncodesCollectionId::Yes) {
268             std::copy(key.data(), key.data() + key.size(), reinterpret_cast<char*>(bytes));
269         } else {
270             // This key is for the default collection
271             bytes[0] = DefaultCollectionLeb128Encoded;
272             std::copy(key.data(), key.data() + key.size(), reinterpret_cast<char*>(bytes) + 1);
273             length++;
274         }
275     }
276 
277     /**
278      * Create a SerialisedDocKey from a byte_buffer that has no collection data
279      * and requires the caller to state the collection-ID
280      * This is used by MutationLogEntryV1/V2 to V3 upgrades
281      */
282     SerialisedDocKey(cb::const_byte_buffer key, CollectionID cid);
283 
284     /**
285      * Create a SerialisedDocKey from a byte_buffer that has collection data
286      */
SerialisedDocKey(cb::const_byte_buffer key)287     SerialisedDocKey(cb::const_byte_buffer key)
288         : length(gsl::narrow_cast<uint8_t>(key.size())) {
289         std::copy(key.begin(), key.end(), reinterpret_cast<char*>(bytes));
290     }
291 
292     /**
293      * Returns the size in bytes of this object - fixed size plus the variable
294      * length for the bytes making up the key.
295      */
getObjectSize(size_t len)296     static size_t getObjectSize(size_t len) {
297         return sizeof(SerialisedDocKey) +
298                (len - sizeof(SerialisedDocKey().bytes));
299     }
300 
301     uint8_t length{0};
302     uint8_t bytes[1];
303 };
304 
305 std::ostream& operator<<(std::ostream& os, const SerialisedDocKey& key);
306 
307 static_assert(std::is_standard_layout<SerialisedDocKey>::value,
308               "SeralisedDocKey: must satisfy is_standard_layout");
309