xref: /3.0.3-GA/ep-engine/src/kvstore.h (revision 4664d274)
1/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 *     Copyright 2010 Couchbase, Inc
4 *
5 *   Licensed under the Apache License, Version 2.0 (the "License");
6 *   you may not use this file except in compliance with the License.
7 *   You may obtain a copy of the License at
8 *
9 *       http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *   Unless required by applicable law or agreed to in writing, software
12 *   distributed under the License is distributed on an "AS IS" BASIS,
13 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *   See the License for the specific language governing permissions and
15 *   limitations under the License.
16 */
17
18#ifndef SRC_KVSTORE_H_
19#define SRC_KVSTORE_H_ 1
20
21#include "config.h"
22
23#include <cstring>
24#include <map>
25#include <string>
26#include <utility>
27#include <vector>
28
29#include "configuration.h"
30#include "stats.h"
31#include "tasks.h"
32#include "vbucket.h"
33
34/**
35 * Result of database mutation operations.
36 *
37 * This is a pair where .first is the number of rows affected, and
38 * .second is true if it is an insertion.
39 *
40 * .first will be -1 if there was an error performing the update.
41 *
42 * .first will be 0 if the update did not error, but did not occur.
43 * This would generally be considered a fatal condition (in practice,
44 * it requires you to be firing an update at a missing rowid).
45 */
46typedef std::pair<int, bool> mutation_result;
47
48typedef std::pair<ENGINE_ERROR_CODE, uint64_t> rollback_error_code;
49
50struct vbucket_state {
51    vbucket_state() { }
52    vbucket_state(vbucket_state_t _state, uint64_t _chkid,
53                  uint64_t _maxDelSeqNum, int64_t _highSeqno) :
54        state(_state), checkpointId(_chkid), maxDeletedSeqno(_maxDelSeqNum),
55        highSeqno(_highSeqno) { }
56
57    vbucket_state_t state;
58    uint64_t checkpointId;
59    uint64_t maxDeletedSeqno;
60    int64_t highSeqno;
61    std::string failovers;
62    uint64_t purgeSeqno;
63};
64
65/**
66 * Type of vbucket map.
67 *
68 * key is the vbucket identifier.
69 * value is a pair of string representation of the vbucket state and
70 * its latest checkpoint Id persisted.
71 */
72typedef std::map<uint16_t, vbucket_state> vbucket_map_t;
73
74/**
75 * Properites of the storage layer.
76 *
77 * If concurrent filesystem access is possible, maxConcurrency() will
78 * be greater than one.  One will need to determine whether more than
79 * one writer is possible as well as whether more than one reader is
80 * possible.
81 */
82class StorageProperties {
83public:
84
85    StorageProperties(bool evb, bool evd, bool pd, bool eget)
86        : efficientVBDump(evb), efficientVBDeletion(evd),
87          persistedDeletions(pd), efficientGet(eget) {}
88
89    //! True if we can efficiently dump a single vbucket.
90    bool hasEfficientVBDump() const { return efficientVBDump; }
91    //! True if we can efficiently delete a vbucket all at once.
92    bool hasEfficientVBDeletion() const { return efficientVBDeletion; }
93
94    //! True if we can persisted deletions to disk.
95    bool hasPersistedDeletions() const { return persistedDeletions; }
96
97    //! True if we can batch-process multiple get operations at once.
98    bool hasEfficientGet() const { return efficientGet; }
99
100private:
101    bool efficientVBDump;
102    bool efficientVBDeletion;
103    bool persistedDeletions;
104    bool efficientGet;
105};
106
107/**
108 * Database strategy
109 */
110enum db_type {
111    single_db,           //!< single database strategy
112    multi_db,            //!< multi-database strategy
113    single_mt_db,        //!< single database, multi-table strategy
114    multi_mt_db,         //!< multi-database, multi-table strategy
115    multi_mt_vb_db       //!< multi-db, multi-table strategy sharded by vbucket
116};
117
118class RollbackCB;
119class AllKeysCB;
120
121/**
122 * Base class representing kvstore operations.
123 */
124class KVStore {
125public:
126    KVStore(bool read_only = false) : readOnly(read_only) { }
127
128    virtual ~KVStore() {}
129
130    virtual size_t getEstimatedItemCount(std::vector<uint16_t> &vbs);
131
132
133    /**
134     * Allow the kvstore to add extra statistics information
135     * back to the client
136     * @param prefix prefix to use for the stats
137     * @param add_stat the callback function to add statistics
138     * @param c the cookie to pass to the callback function
139     */
140    virtual void addStats(const std::string &prefix, ADD_STAT add_stat, const void *c) {
141        (void)prefix;
142        (void)add_stat;
143        (void)c;
144    }
145
146    /**
147     * Show kvstore specific timing stats.
148     *
149     * @param prefix prefix to use for the stats
150     * @param add_stat the callback function to add statistics
151     * @param c the cookie to pass to the callback function
152     */
153    virtual void addTimingStats(const std::string &, ADD_STAT, const void *) {
154    }
155
156    /**
157     * Resets kvstore specific stats
158     */
159     virtual void resetStats() {
160     }
161
162    /**
163     * Reset the store to a clean state.
164     */
165    virtual void reset(uint16_t shardId) = 0;
166
167    /**
168     * Begin a transaction (if not already in one).
169     *
170     * @return false if we cannot begin a transaction
171     */
172    virtual bool begin() = 0;
173
174    /**
175     * Commit a transaction (unless not currently in one).
176     *
177     * @return false if the commit fails
178     */
179    virtual bool commit(Callback<kvstats_ctx> *cb) = 0;
180
181    /**
182     * Rollback the current transaction.
183     */
184    virtual void rollback() = 0;
185
186    /**
187     * Get the properties of the underlying storage.
188     */
189    virtual StorageProperties getStorageProperties() = 0;
190
191    /**
192     * Set an item into the kv store.
193     */
194    virtual void set(const Item &item,
195                     Callback<mutation_result> &cb) = 0;
196
197    /**
198     * Get an item from the kv store.
199     */
200    virtual void get(const std::string &key, uint64_t rowid,
201                     uint16_t vb,
202                     Callback<GetValue> &cb, bool fetchDelete = false) = 0;
203
204    virtual void getWithHeader(void *dbHandle, const std::string &key,
205                               uint16_t vb, Callback<GetValue> &cb,
206                               bool fetchDelete = false) = 0;
207
208    /**
209     * Get multiple items if supported by the kv store
210     */
211    virtual void getMulti(uint16_t vb, vb_bgfetch_queue_t &itms) {
212        (void) itms; (void) vb;
213        throw std::runtime_error("Backend does not support getMulti()");
214    }
215
216    /**
217     * Delete an item from the kv store.
218     */
219    virtual void del(const Item &itm, Callback<int> &cb) = 0;
220
221    /**
222     * Delete a given vbucket database.
223     */
224    virtual bool delVBucket(uint16_t vbucket, bool recreate = false) = 0;
225
226    /**
227     * Get a list of all persisted vbuckets (with their states).
228     */
229    virtual vbucket_map_t listPersistedVbuckets(void) = 0;
230
231    /**
232     * Get a list of all persisted engine and tap stats. This API is mainly
233     * invoked during warmup to get the engine stats from the previous session.
234     *
235     * @param stats map instance where the engine stats from the previous
236     * session is stored.
237     */
238    virtual void getPersistedStats(std::map<std::string, std::string> &stats) {
239        (void) stats;
240    }
241
242    /**
243     * Persist a snapshot of a collection of stats.
244     */
245    virtual bool snapshotStats(const std::map<std::string, std::string> &m) = 0;
246
247    /**
248     * Snapshot vbucket states.
249     */
250    virtual bool snapshotVBuckets(const vbucket_map_t &m,
251                                  Callback<kvstats_ctx> *cb) = 0;
252
253    /**
254     * Compact a vbucket file.
255     */
256    virtual bool compactVBucket(const uint16_t vbid,
257                                compaction_ctx *c,
258                                Callback<compaction_ctx> &cb,
259                                Callback<kvstats_ctx> &kvcb) = 0;
260
261    /**
262     * Pass all stored data for specified keys through the given callback.
263     */
264    virtual void dump(std::vector<uint16_t> &vbids,
265                      shared_ptr<Callback<GetValue> > cb,
266                      shared_ptr<Callback<CacheLookup> > cl) = 0;
267
268    /**
269     * Pass all stored data for the given vbucket through the given
270     * callback.
271     */
272    virtual void dump(uint16_t vbid, uint64_t stSeqno,
273                      shared_ptr<Callback<GetValue> > cb,
274                      shared_ptr<Callback<CacheLookup> > cl,
275                      shared_ptr<Callback<SeqnoRange> > sr) = 0;
276
277    /**
278     * Check if the kv-store supports a dumping all of the keys
279     * @return true you may call dumpKeys() to do a prefetch
280     *              of the keys
281     */
282    virtual bool isKeyDumpSupported() {
283        return false;
284    }
285
286    /**
287     * Dump the keys from a given set of vbuckets
288     * @param vbids the vbuckets to dump
289     * @param cb the callback to fire for each document
290     */
291    virtual void dumpKeys(std::vector<uint16_t> &vbids, shared_ptr<Callback<GetValue> > cb) {
292        (void)vbids; (void)cb;
293        throw std::runtime_error("Backed does not support dumpKeys()");
294    }
295
296    virtual void dumpDeleted(uint16_t vbid, uint64_t stSeqno, uint64_t enSeqno,
297                             shared_ptr<Callback<GetValue> > cb) {
298        (void) vbid; (void) cb;
299        throw std::runtime_error("Backend does not support dumpDeleted()");
300    }
301
302    virtual size_t getNumPersistedDeletes(uint16_t) {
303        return 0;
304    }
305
306    virtual size_t getNumItems(uint16_t) {
307        return 0;
308    }
309
310    virtual size_t getNumItems(uint16_t, uint64_t, uint64_t) {
311        return 0;
312    }
313
314    virtual rollback_error_code rollback(uint16_t vbid,
315                                         uint64_t rollbackseqno,
316                                         shared_ptr<RollbackCB> cb) = 0;
317
318    /**
319     * This method is called before persisting a batch of data if you'd like to
320     * do stuff to them that might improve performance at the IO layer.
321     */
322    virtual void optimizeWrites(std::vector<queued_item> &items) {
323        (void)items;
324        // EMPTY
325    }
326
327    bool isReadOnly(void) {
328        return readOnly;
329    }
330
331    virtual ENGINE_ERROR_CODE getAllKeys(uint16_t vbid,
332                                         std::string &start_key, uint32_t count,
333                                         AllKeysCB *cb) = 0;
334
335protected:
336    bool readOnly;
337
338};
339
340/**
341 * The KVStoreFactory creates the correct KVStore instance(s) when
342 * needed by EPStore.
343 */
344class KVStoreFactory {
345public:
346
347    /**
348     * Create a KVStore with the given type.
349     *
350     * @param stats the engine stats
351     * @param config the engine configuration
352     * @param read_only true if the kvstore instance is for read operations only
353     */
354    static KVStore *create(EPStats &stats, Configuration &config,
355                           bool read_only = false);
356};
357
358/**
359 * Callback class used by UprConsumer, for rollback operation
360 */
361class RollbackCB : public Callback<GetValue> {
362public:
363    RollbackCB(EventuallyPersistentEngine& e) :
364        engine_(e), dbHandle(NULL) { }
365
366    void setDbHeader(void *db) {
367        dbHandle = db;
368    }
369
370    void callback(GetValue &val);
371
372private:
373    EventuallyPersistentEngine& engine_;
374    void *dbHandle;
375};
376
377/**
378 * Callback class used by AllKeysAPI, for caching fetched keys
379 *
380 * As by default (or in most cases), number of keys is 1000,
381 * and an average key could be 32B in length, initialize buffersize of
382 * allKeys to 34000 (1000 * 32 + 1000 * 2), the additional 2 bytes per
383 * key is for the keylength.
384 *
385 * This initially allocated buffersize is doubled whenever the length
386 * of the buffer holding all the keys, crosses the buffersize.
387 */
388class AllKeysCB {
389public:
390    AllKeysCB() {
391        length = 0;
392        buffersize = 34000;
393        buffer = (char *) malloc(buffersize);
394    }
395
396    ~AllKeysCB() {
397        free(buffer);
398    }
399
400    void addtoAllKeys (uint16_t len, char *buf);
401
402    char* getAllKeysPtr() { return buffer; }
403    uint64_t getAllKeysLen() { return length; }
404
405private:
406    uint64_t length;
407    uint64_t buffersize;
408    char *buffer;
409
410};
411
412#endif  // SRC_KVSTORE_H_
413