1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  *     Copyright 2016 Couchbase, Inc
4  *
5  *   Licensed under the Apache License, Version 2.0 (the "License");
6  *   you may not use this file except in compliance with the License.
7  *   You may obtain a copy of the License at
8  *
9  *       http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *   Unless required by applicable law or agreed to in writing, software
12  *   distributed under the License is distributed on an "AS IS" BASIS,
13  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *   See the License for the specific language governing permissions and
15  *   limitations under the License.
16  */
17 
18 #pragma once
19 
20 #include "queue_op.h"
21 
22 #include <folly/lang/Assume.h>
23 #include <folly/lang/Bits.h>
24 #include <libcouchstore/couch_common.h>
25 #include <memcached/durability_spec.h>
26 #include <memcached/protocol_binary.h>
27 #include <memcached/types.h>
28 #include <platform/n_byte_integer.h>
29 
30 #include <memory>
31 #include <type_traits>
32 
33 // Bitwise masks for manipulating the flexCode variable inside MetaDataV1
34 const uint8_t flexCodeMask = 0x7F;
35 const uint8_t deleteSourceMask = 0x80;
36 
37 // These classes are written to disk in couchstore, so we want to (a) have
38 // a stable binary layout and (b) minimise the space they take.
39 // Therefore turn on packing of structures. The resulting sizes are verified
40 // with static_assert()s below to ensure they are always fixed at the expected
41 // size.
42 #pragma pack(1)
43 class MetaData {
44 protected:
45     /*
46      * Declare the metadata formats in protected visibility.
47      *
48      * Each version generally extends the previous version, thus in memory you
49      * have
50      * [V0] or
51      * [V0][V1]
52      *
53      * V3 is special - given that V2 is no longer used (contained fields which
54      * were subsequently removed); it extends from V1 - i.e.
55      *
56      * [V0][V1][V3]
57      */
58     class MetaDataV0 {
59     public:
MetaDataV0()60         MetaDataV0() : cas(0), exptime(0), flags(0) {
61         }
62 
initialise(const char* raw)63         void initialise(const char* raw) {
64             std::memcpy(this, raw, sizeof(MetaDataV0));
65             // Re-write only cas/exptime to get them in the correct byte-order
66             cas = ntohll(cas);
67             exptime = ntohl(exptime);
68         }
69 
70         /*
71          * When V0 is persisted, the cas/exptime are in network byte order.
72          */
prepareForPersistence()73         void prepareForPersistence() {
74             cas = htonll(cas);
75             exptime = htonl(exptime);
76         }
77 
getCas() const78         uint64_t getCas() const {
79             return cas;
80         }
81 
setCas(uint64_t cas)82         void setCas(uint64_t cas) {
83             this->cas = cas;
84         }
85 
getExptime() const86         uint32_t getExptime() const {
87             return exptime;
88         }
89 
setExptime(uint32_t exptime)90         void setExptime(uint32_t exptime) {
91             this->exptime = exptime;
92         }
93 
getFlags() const94         uint32_t getFlags() const {
95             return flags;
96         }
97 
setFlags(uint32_t flags)98         void setFlags(uint32_t flags) {
99             this->flags = flags;
100         }
101 
copyToBuf(char* raw) const102         void copyToBuf(char* raw) const {
103             // cas and exptime need converting to network byte-order
104             uint64_t casNBO = htonll(cas);
105             uint32_t exptimeNBO = htonl(exptime);
106             std::memcpy(raw, &casNBO, sizeof(uint64_t));
107             std::memcpy(raw + sizeof(uint64_t), &exptimeNBO, sizeof(uint32_t));
108             std::memcpy(raw + sizeof(uint64_t) + sizeof(uint32_t),
109                         &flags,
110                         sizeof(uint32_t));
111         }
112 
113     private:
114         /*
115          * V0 knows about CAS, expiry time and flags.
116          */
117         uint64_t cas;
118 
119         /**
120          * For Alive documents, the time it should expire. For Deleted
121          * documents, the time the document was deleted.
122          * Expressed as seconds since unix epoch (time_t).
123          */
124         uint32_t exptime;
125         uint32_t flags;
126     };
127 
128     static_assert(sizeof(MetaDataV0) == 16,
129                   "MetaDataV0 is not the expected size.");
130 
131     class MetaDataV1 {
132     public:
MetaDataV1()133         MetaDataV1() : flexCode(0), dataType(PROTOCOL_BINARY_RAW_BYTES) {
134         }
135 
initialise(const char* raw)136         void initialise(const char* raw) {
137             flexCode = raw[0];
138             dataType = raw[1];
139 
140             if (getFlexCode() != FLEX_META_CODE) {
141                 std::invalid_argument(
142                         "MetaDataV1::initialise illegal "
143                         "flexCode \"" +
144                         std::to_string(flexCode) + "\"");
145             }
146         }
147 
setFlexCode()148         void setFlexCode() {
149             setFlexCode(FLEX_META_CODE);
150         }
151 
setFlexCode(uint8_t code)152         void setFlexCode(uint8_t code) {
153             auto codeIn = code & flexCodeMask;
154             flexCode = codeIn + (flexCode & deleteSourceMask);
155         }
156 
getFlexCode() const157         uint8_t getFlexCode() const {
158             return static_cast<uint8_t>(flexCode & flexCodeMask);
159         }
160 
setDataType(protocol_binary_datatype_t dataType)161         void setDataType(protocol_binary_datatype_t dataType) {
162             this->dataType = static_cast<uint8_t>(dataType);
163         }
164 
getDataType() const165         protocol_binary_datatype_t getDataType() const {
166             return static_cast<protocol_binary_datatype_t>(dataType);
167         }
168 
copyToBuf(char* raw) const169         void copyToBuf(char* raw) const {
170             raw[0] = flexCode;
171             raw[1] = dataType;
172         }
173 
setDeleteSource(DeleteSource source)174         void setDeleteSource(DeleteSource source) {
175             auto deleteInt = (static_cast<uint8_t>(source)) << 7;
176             flexCode = deleteInt + (flexCode & flexCodeMask);
177         }
178 
getDeleteSource() const179         DeleteSource getDeleteSource() const {
180             auto deleteBit = flexCode >> 7;
181             return static_cast<DeleteSource>(deleteBit);
182         }
183 
184     private:
185         /*
186          * V1 is a 2 byte extension storing datatype
187          *   0 - flexCode (which also holds deleteSource in bit 7)
188          *   1 - dataType
189          */
190         uint8_t flexCode;
191         uint8_t dataType;
192     };
193 
194     static_assert(sizeof(MetaDataV1) == 2,
195                   "MetaDataV1 is not the expected size.");
196 
197     class MetaDataV2 {
198     public:
199         MetaDataV2() = default;
200 
initialise(const char* raw)201         void initialise(const char* raw) {
202             confResMode = raw[0];
203         }
204 
205     private:
206         /*
207          * V2 is a 1 byte extension storing the conflict resolution mode.
208          * This 1 byte extension is never stored out anymore, but may
209          * exist from down-level ep-engine.
210          */
211         uint8_t confResMode = 0;
212     };
213 
214     static_assert(sizeof(MetaDataV2) == 1,
215                   "MetaDataV2 is not the expected size.");
216 
217     /*
218      * V3 is a 2 byte[1] extension storing Synchronous Replication state.
219      *
220      * [1] It /could/ fit all required state into a single byte, however that
221      * would mean it's the same size as V2 (1Byte) and we use metadata size to
222      * distinguish between the different metadata versions. As such 2bytes are
223      * used which logically wastes 1 byte per SyncWrite. If / when we
224      * restructure MetaData to say use variable-length encoding with explicit
225      * versions (a la flex framing extras) the size could be reduced.
226      */
227     class MetaDataV3 {
228     public:
229         // The Operation this represents - maps to queue_op types:
230         enum class Operation : uint8_t {
231             // A pending sync write. 'level' field defines the durability_level.
232             // Present in the DurabilityPrepare namespace.
233             Pending = 0,
234             // A committed SyncWrite.
235             // This exists so we can correctly backfill from disk a Committed
236             // mutation and sent out as a DCP_COMMIT to sync_replication
237             // enabled DCP clients.
238             // Present in the 'normal' (committed) namespace.
239             Commit = 1,
240             // An aborted SyncWrite.
241             // This exists so we can correctly backfill from disk an Aborted
242             // mutation and sent out as a DCP_ABORT to sync_replication
243             // enabled DCP clients.
244             // Present in the DurabilityPrepare namespace.
245             Abort = 2,
246         };
247 
248         MetaDataV3() = default;
249 
initialise(const char* raw)250         void initialise(const char* raw) {
251             operation = Operation(raw[0]);
252             uint64_t buf;
253             std::memcpy(&buf, &raw[1], sizeof(cb::uint48_t));
254             details.raw = cb::uint48_t(buf).ntoh();
255         };
256 
copyToBuf(char* raw) const257         void copyToBuf(char* raw) const {
258             raw[0] = char(operation);
259             std::memcpy(&raw[1], &details.raw, sizeof(cb::uint48_t));
260         }
261 
setDurabilityOp(queue_op op)262         void setDurabilityOp(queue_op op) {
263             switch (op) {
264             case queue_op::pending_sync_write:
265                 operation = Operation::Pending;
266                 break;
267             case queue_op::commit_sync_write:
268                 operation = Operation::Commit;
269                 break;
270             case queue_op::abort_sync_write:
271                 operation = Operation::Abort;
272                 break;
273             default:
274                 throw std::invalid_argument(
275                         "MetaDataV3::setDurabilityOp: Unsupported op " +
276                         to_string(op));
277             }
278         }
279 
getDurabilityOp() const280         queue_op getDurabilityOp() const {
281             switch (operation) {
282             case Operation::Pending:
283                 return queue_op::pending_sync_write;
284             case Operation::Commit:
285                 return queue_op::commit_sync_write;
286             case Operation::Abort:
287                 return queue_op::abort_sync_write;
288             default:
289                 throw std::invalid_argument(
290                         "MetaDataV3::getDurabiltyOp: Unsupported op " +
291                         std::to_string(int(operation)));
292             }
293         }
294 
getDurabilityLevel() const295         cb::durability::Level getDurabilityLevel() const {
296             Expects(operation == Operation::Pending);
297             return static_cast<cb::durability::Level>(details.pending.level);
298         }
299 
setDurabilityLevel(cb::durability::Level level_)300         void setDurabilityLevel(cb::durability::Level level_) {
301             Expects(operation == Operation::Pending);
302             details.pending.level = static_cast<char>(level_);
303         }
304 
isPreparedDelete() const305         bool isPreparedDelete() const {
306             Expects(operation == Operation::Pending);
307             return details.pending.isDelete == 1;
308         }
309 
setPreparedDelete(bool isPreparedSyncDelete)310         void setPreparedDelete(bool isPreparedSyncDelete) {
311             Expects(operation == Operation::Pending);
312             details.pending.isDelete = isPreparedSyncDelete;
313         }
314 
getPrepareSeqno() const315         cb::uint48_t getPrepareSeqno() const {
316             Expects(operation == Operation::Commit ||
317                     operation == Operation::Abort);
318             return details.completed.prepareSeqno;
319         }
320 
setPrepareSeqno(cb::uint48_t prepareSeqno)321         void setPrepareSeqno(cb::uint48_t prepareSeqno) {
322             Expects(operation == Operation::Commit ||
323                     operation == Operation::Abort);
324             details.completed.prepareSeqno = prepareSeqno;
325         }
326 
prepareForPersistence()327         void prepareForPersistence() {
328             details.raw = details.raw.hton();
329         }
330 
isCommit() const331         bool isCommit() const {
332             return operation == Operation::Commit;
333         }
334 
isPrepare() const335         bool isPrepare() const {
336             return operation == Operation::Pending;
337         }
338 
339     private:
340         // Assigning a whole byte to this (see MetaDataV3 class comment)
341         // although only currently need 2 bits.
342         Operation operation;
343 
344         // [[if Pending]] Properties of the pending SyncWrite.
345         // Currently using 3 bits out of the available 8 in this byte.
346         union detailsUnion {
347             // Need to supply a default constructor or the compiler will
348             // complain about cb::uint48_t
detailsUnion()349             detailsUnion() : raw(0){};
350             struct {
351                 // 0:pendingSyncWrite, 1:pendingSyncDelete.
352                 uint8_t isDelete : 1;
353                 // cb::durability::Level
354                 uint8_t level : 2;
355             } pending;
356             struct completedDetails {
357                 // prepareSeqno of the completed Sync Write
358                 cb::uint48_t prepareSeqno;
359             } completed;
360 
361             cb::uint48_t raw;
362         } details;
363     };
364 
365     static_assert(sizeof(MetaDataV3) == 7,
366                   "MetaDataV3 is not the expected size.");
367 
368 public:
369     enum class Version {
370         V0, // Cas/Exptime/Flags
371         V1, // Flex code and datatype
372         V2, // Conflict Resolution Mode - not stored, but can be read
373         V3, // Synchronous Replication state.
374         /*
375          * !!MetaData Warning!!
376          * Sherlock began storing the V2 MetaData.
377          * Watson stops storing the V2 MetaData (now storing V1)
378          *
379          * Any new MetaData (e.g a V3) we wish to store may cause trouble if it
380          * has the size of V2, code assumes the version from the size.
381          */
382     };
383 
MetaData()384     MetaData() {
385     }
386 
387     /*
388      * Construct metadata from a sized_buf, the assumption is that the
389      * data has come back from couchstore.
390      */
MetaData(const sized_buf& in)391     MetaData(const sized_buf& in) : initVersion(Version::V0) {
392         // Expect metadata to be V0, V1, V2 or V3
393         // V2 part is ignored, but valid to find in storage.
394         if (in.size < getMetaDataSize(Version::V0) ||
395             in.size > getMetaDataSize(Version::V3)) {
396             throw std::invalid_argument("MetaData::MetaData in.size \"" +
397                                         std::to_string(in.size) +
398                                         "\" is out of range.");
399         }
400 
401         // Initialise at least the V0 metadata
402         allMeta.v0.initialise(in.buf);
403 
404         // The rest depends on in.size
405         if (in.size >= (sizeof(MetaDataV0) + sizeof(MetaDataV1))) {
406             // The size extends enough to include V1 meta, initialise that.
407             allMeta.v1.initialise(in.buf + sizeof(MetaDataV0));
408             initVersion = Version::V1;
409         }
410 
411         // Not initialising V2 from 'in' as V2 is ignored.
412 
413         if (in.size >=
414             (sizeof(MetaDataV0) + sizeof(MetaDataV1) + sizeof(MetaDataV3))) {
415             // The size extends enough to include V3 meta, initialise that.
416             allMeta.v3.initialise(in.buf + sizeof(MetaDataV0) +
417                                   sizeof(MetaDataV1));
418             initVersion = Version::V3;
419         }
420     }
421 
422     /*
423      * The reverse of MetaData(const sized_buf& in), copy the data out
424      * to a pre-allocated sized_buf ready for passing to couchstore.
425      */
copyToBuf(sized_buf& out) const426     void copyToBuf(sized_buf& out) const {
427         if ((out.size != getMetaDataSize(Version::V1) &&
428              (out.size != getMetaDataSize(Version::V3)))) {
429             throw std::invalid_argument(
430                     "MetaData::copyToBuf out.size \"" +
431                     std::to_string(out.size) +
432                     "\" incorrect size (only V1 and V3 supported)");
433         }
434         // Copy the V0/V1 meta data holders to the output buffer
435         allMeta.v0.copyToBuf(out.buf);
436         allMeta.v1.copyToBuf(out.buf + sizeof(MetaDataV0));
437 
438         // We can write either V1 or V3 at present (V3 contains metadata which
439         // is only applicable to SyncWrites, so we use V1 for non-SyncWrites
440         // and V3 for SyncWrites.
441         if (out.size == getMetaDataSize(Version::V3)) {
442             allMeta.v3.copyToBuf(out.buf + sizeof(MetaDataV0) +
443                                  sizeof(MetaDataV1));
444         }
445     }
446 
447     /*
448      * Prepare the metadata for persistence (byte-swap certain fields) and
449      * return a pointer to the metadata ready for persistence.
450      */
prepareAndGetForPersistence()451     char* prepareAndGetForPersistence() {
452         allMeta.v0.prepareForPersistence();
453         allMeta.v3.prepareForPersistence();
454         return reinterpret_cast<char*>(&allMeta);
455     }
456 
setCas(uint64_t cas)457     void setCas(uint64_t cas) {
458         allMeta.v0.setCas(cas);
459     }
460 
getCas() const461     uint64_t getCas() const {
462         return allMeta.v0.getCas();
463     }
464 
setExptime(uint32_t exptime)465     void setExptime(uint32_t exptime) {
466         allMeta.v0.setExptime(exptime);
467     }
468 
getExptime() const469     uint32_t getExptime() const {
470         return allMeta.v0.getExptime();
471     }
472 
setFlags(uint32_t flags)473     void setFlags(uint32_t flags) {
474         allMeta.v0.setFlags(flags); // flags are not byteswapped
475     }
476 
getFlags() const477     uint32_t getFlags() const {
478         return allMeta.v0.getFlags(); // flags are not byteswapped
479     }
480 
setFlexCode()481     void setFlexCode() {
482         allMeta.v1.setFlexCode();
483     }
484 
setFlexCode(uint8_t code)485     void setFlexCode(uint8_t code) {
486         allMeta.v1.setFlexCode(code);
487     }
488 
getFlexCode() const489     uint8_t getFlexCode() const {
490         return allMeta.v1.getFlexCode();
491     }
492 
setDeleteSource(DeleteSource source)493     void setDeleteSource(DeleteSource source) {
494         allMeta.v1.setDeleteSource(source);
495     }
496 
getDeleteSource() const497     DeleteSource getDeleteSource() const {
498         return allMeta.v1.getDeleteSource();
499     }
500 
501     /*
502      * Note that setting the data type will also set the flex code.
503      */
setDataType(protocol_binary_datatype_t dataType)504     void setDataType(protocol_binary_datatype_t dataType) {
505         setFlexCode();
506         allMeta.v1.setDataType(dataType);
507     }
508 
getDataType() const509     protocol_binary_datatype_t getDataType() const {
510         return allMeta.v1.getDataType();
511     }
512 
setDurabilityOp(queue_op op)513     void setDurabilityOp(queue_op op) {
514         allMeta.v3.setDurabilityOp(op);
515     }
516 
getDurabilityOp() const517     queue_op getDurabilityOp() const {
518         return allMeta.v3.getDurabilityOp();
519     }
520 
setPrepareProperties(cb::durability::Level level, bool isSyncDelete)521     void setPrepareProperties(cb::durability::Level level, bool isSyncDelete) {
522         allMeta.v3.setDurabilityLevel(level);
523         allMeta.v3.setPreparedDelete(isSyncDelete);
524     }
525 
setCompletedProperties(cb::uint48_t prepareSeqno)526     void setCompletedProperties(cb::uint48_t prepareSeqno) {
527         allMeta.v3.setPrepareSeqno(prepareSeqno);
528     }
529 
getDurabilityLevel() const530     cb::durability::Level getDurabilityLevel() const {
531         return allMeta.v3.getDurabilityLevel();
532     }
533 
isPreparedSyncDelete() const534     bool isPreparedSyncDelete() const {
535         return allMeta.v3.isPreparedDelete();
536     }
537 
isCommit() const538     bool isCommit() const {
539         return getVersionInitialisedFrom() != MetaData::Version::V3 ||
540                allMeta.v3.isCommit();
541     }
542 
isPrepare() const543     bool isPrepare() const {
544         return getVersionInitialisedFrom() == MetaData::Version::V3 &&
545                allMeta.v3.isPrepare();
546     }
547 
getPrepareSeqno() const548     cb::uint48_t getPrepareSeqno() const {
549         return allMeta.v3.getPrepareSeqno();
550     }
551 
getVersionInitialisedFrom() const552     Version getVersionInitialisedFrom() const {
553         return initVersion;
554     }
555 
getMetaDataSize(Version version)556     static size_t getMetaDataSize(Version version) {
557         switch (version) {
558         case Version::V0:
559             return sizeof(MetaDataV0);
560         case Version::V1:
561             return sizeof(MetaDataV0) + sizeof(MetaDataV1);
562         case Version::V2:
563             return sizeof(MetaDataV0) + sizeof(MetaDataV1) + sizeof(MetaDataV2);
564         case Version::V3:
565             return sizeof(MetaDataV0) + sizeof(MetaDataV1) + sizeof(MetaDataV3);
566         }
567         folly::assume_unreachable();
568     }
569 
570 protected:
571     class AllMetaData {
572     public:
573         MetaDataV0 v0;
574         MetaDataV1 v1;
575         // V2 is essentially a dead version, we no longer write it. Therefore
576         // V3 (and upwards) do not include it, and instead just extend V1.
577         MetaDataV3 v3;
578     } allMeta;
579     Version initVersion;
580 };
581 #pragma pack()
582 
583 /*
584  * Create the appropriate MetaData container.
585  */
586 class MetaDataFactory {
587 public:
createMetaData(sized_buf metadata)588     static std::unique_ptr<MetaData> createMetaData(sized_buf metadata) {
589         return std::unique_ptr<MetaData>(new MetaData(metadata));
590     }
591 
createMetaData()592     static std::unique_ptr<MetaData> createMetaData() {
593         return std::unique_ptr<MetaData>(new MetaData());
594     }
595 };
596