130b18fb0SJim Walker/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
230b18fb0SJim Walker/*
330b18fb0SJim Walker *     Copyright 2015 Couchbase, Inc.
430b18fb0SJim Walker *
530b18fb0SJim Walker *   Licensed under the Apache License, Version 2.0 (the "License");
630b18fb0SJim Walker *   you may not use this file except in compliance with the License.
730b18fb0SJim Walker *   You may obtain a copy of the License at
830b18fb0SJim Walker *
930b18fb0SJim Walker *       http://www.apache.org/licenses/LICENSE-2.0
1030b18fb0SJim Walker *
1130b18fb0SJim Walker *   Unless required by applicable law or agreed to in writing, software
1230b18fb0SJim Walker *   distributed under the License is distributed on an "AS IS" BASIS,
1330b18fb0SJim Walker *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1430b18fb0SJim Walker *   See the License for the specific language governing permissions and
1530b18fb0SJim Walker *   limitations under the License.
1630b18fb0SJim Walker */
1730b18fb0SJim Walker
1830b18fb0SJim Walker//
1930b18fb0SJim Walker// couch_create, a program for 'offline' generation of Couchbase compatible
2030b18fb0SJim Walker// couchstore files.
2130b18fb0SJim Walker//
2230b18fb0SJim Walker
2330b18fb0SJim Walker#include <getopt.h>
2430b18fb0SJim Walker#include <inttypes.h>
2530b18fb0SJim Walker#include <stddef.h>
2630b18fb0SJim Walker#include <stdint.h>
2730b18fb0SJim Walker#include <atomic>
2830b18fb0SJim Walker#include <chrono>
2930b18fb0SJim Walker#include <climits>
3030b18fb0SJim Walker#include <condition_variable>
3130b18fb0SJim Walker#include <cstdlib>
3230b18fb0SJim Walker#include <cstring>
3330b18fb0SJim Walker#include <deque>
3430b18fb0SJim Walker#include <exception>
3530b18fb0SJim Walker#include <iostream>
3630b18fb0SJim Walker#include <memory>
3730b18fb0SJim Walker#include <mutex>
3830b18fb0SJim Walker#include <set>
3930b18fb0SJim Walker#include <sstream>
4030b18fb0SJim Walker#include <string>
4130b18fb0SJim Walker#include <thread>
4230b18fb0SJim Walker#include <vector>
4330b18fb0SJim Walker#include "crc32.h"
4430b18fb0SJim Walker
4530b18fb0SJim Walker#include "libcouchstore/couch_db.h"
4630b18fb0SJim Walker
4730b18fb0SJim Walkerenum VBucketState { VB_ACTIVE, VB_REPLICA, VB_UNMANAGED };
4830b18fb0SJim Walker
4930b18fb0SJim Walkerenum DocType {
5030b18fb0SJim Walker    BINARY_DOC,
5130b18fb0SJim Walker    BINARY_DOC_COMPRESSED,
5230b18fb0SJim Walker    JSON_DOC,
5330b18fb0SJim Walker    JSON_DOC_COMPRESSED
5430b18fb0SJim Walker};
5530b18fb0SJim Walker
5630b18fb0SJim Walker//
5730b18fb0SJim Walker// ProgramParameters:
5830b18fb0SJim Walker// An object to process argv/argc and carry around the parameters to operate
5930b18fb0SJim Walker// with.
6030b18fb0SJim Walker//
6130b18fb0SJim Walkerclass ProgramParameters {
6230b18fb0SJim Walkerpublic:
6330b18fb0SJim Walker    // Define all program defaults as static
6430b18fb0SJim Walker    static const bool reuse_couch_files_default = false;
6530b18fb0SJim Walker    static const int vbc_default = 1024;
6630b18fb0SJim Walker    static const uint64_t key_count_default = 0;
6730b18fb0SJim Walker    static const int keys_per_flush_default = 512;
6830b18fb0SJim Walker    static const int doc_len_default = 256;
6930b18fb0SJim Walker    static const int keys_per_vbucket_default = false;
7030b18fb0SJim Walker    static const uint64_t start_key_default = 0;
7130b18fb0SJim Walker    static const bool low_compression_default = false;
7230b18fb0SJim Walker    static const DocType doc_type_default = BINARY_DOC_COMPRESSED;
7330b18fb0SJim Walker    static const int flusher_count_default = 8;
7430b18fb0SJim Walker
7530b18fb0SJim Walker    //
7630b18fb0SJim Walker    // Construct a program parameters, all parameters assigned default settings
7730b18fb0SJim Walker    //
7830b18fb0SJim Walker    ProgramParameters()
7930b18fb0SJim Walker        : reuse_couch_files(reuse_couch_files_default),
8030b18fb0SJim Walker          vbc(vbc_default),
8130b18fb0SJim Walker          key_count(key_count_default),
8230b18fb0SJim Walker          keys_per_vbucket(keys_per_vbucket_default),
8330b18fb0SJim Walker          keys_per_flush(keys_per_flush_default),
8430b18fb0SJim Walker          doc_len(doc_len_default),
8530b18fb0SJim Walker          doc_type(doc_type_default),
8630b18fb0SJim Walker          vbuckets(vbc_default),
8730b18fb0SJim Walker          vbuckets_managed(0),
8830b18fb0SJim Walker          start_key(start_key_default),
8930b18fb0SJim Walker          low_compression(low_compression_default),
9030b18fb0SJim Walker          flusher_count(flusher_count_default) {
9130b18fb0SJim Walker        fill(vbuckets.begin(), vbuckets.end(), VB_UNMANAGED);
9230b18fb0SJim Walker    }
9330b18fb0SJim Walker
9430b18fb0SJim Walker    void load(int argc, char** argv) {
9530b18fb0SJim Walker        const int KEYS_PER_VBUCKET = 1000;
9630b18fb0SJim Walker        while (1) {
9730b18fb0SJim Walker            static struct option long_options[] = {
9830b18fb0SJim Walker                    {"reuse", no_argument, 0, 'r'},
9930b18fb0SJim Walker                    {"vbc", required_argument, 0, 'v'},
10030b18fb0SJim Walker                    {"keys", required_argument, 0, 'k'},
10130b18fb0SJim Walker                    {"keys-per-vbucket", no_argument, 0, KEYS_PER_VBUCKET},
10230b18fb0SJim Walker                    {"keys-per-flush", required_argument, 0, 'f'},
10330b18fb0SJim Walker                    {"doc-len", required_argument, 0, 'd'},
10430b18fb0SJim Walker                    {"doc-type", required_argument, 0, 't'},
10530b18fb0SJim Walker                    {"start-key", required_argument, 0, 's'},
10630b18fb0SJim Walker                    {"low-compression", no_argument, 0, 'l'},
10730b18fb0SJim Walker                    {0, 0, 0, 0}};
10830b18fb0SJim Walker            /* getopt_long stores the option index here. */
10930b18fb0SJim Walker            int option_index = 0;
11030b18fb0SJim Walker
11130b18fb0SJim Walker            int c = getopt_long(
11230b18fb0SJim Walker                    argc, argv, "s:v:k:f:d:t:rl", long_options, &option_index);
11330b18fb0SJim Walker
11430b18fb0SJim Walker            /* Detect the end of the options. */
11530b18fb0SJim Walker            if (c == -1) {
11630b18fb0SJim Walker                break;
11730b18fb0SJim Walker            }
11830b18fb0SJim Walker
11930b18fb0SJim Walker            switch (c) {
12030b18fb0SJim Walker            case 'v': {
12130b18fb0SJim Walker                vbc = static_cast<int16_t>(atoi(optarg));
12230b18fb0SJim Walker                vbuckets.resize(vbc);
12330b18fb0SJim Walker                break;
12430b18fb0SJim Walker            }
12530b18fb0SJim Walker
12630b18fb0SJim Walker            case 'k': {
12730b18fb0SJim Walker                key_count = strtoull(optarg, 0, 10);
12830b18fb0SJim Walker                break;
12930b18fb0SJim Walker            }
13030b18fb0SJim Walker
13130b18fb0SJim Walker            case 'f': {
13230b18fb0SJim Walker                keys_per_flush = atoi(optarg);
13330b18fb0SJim Walker                break;
13430b18fb0SJim Walker            }
13530b18fb0SJim Walker
13630b18fb0SJim Walker            case 'd': {
13730b18fb0SJim Walker                doc_len = atoi(optarg);
13830b18fb0SJim Walker                break;
13930b18fb0SJim Walker            }
14030b18fb0SJim Walker
14130b18fb0SJim Walker            case 'r': {
14230b18fb0SJim Walker                reuse_couch_files = true;
14330b18fb0SJim Walker                break;
14430b18fb0SJim Walker            }
14530b18fb0SJim Walker
14630b18fb0SJim Walker            case 'l': {
14730b18fb0SJim Walker                low_compression = true;
14830b18fb0SJim Walker                break;
14930b18fb0SJim Walker            }
15030b18fb0SJim Walker
15130b18fb0SJim Walker            case 's': {
15230b18fb0SJim Walker                start_key = strtoull(optarg, 0, 10);
15330b18fb0SJim Walker                break;
15430b18fb0SJim Walker            }
15530b18fb0SJim Walker
15630b18fb0SJim Walker            case 't': {
15730b18fb0SJim Walker                if (strcmp(optarg, "binary") == 0) {
15830b18fb0SJim Walker                    doc_type = BINARY_DOC;
15930b18fb0SJim Walker                } else if (strcmp(optarg, "binarycompressed") == 0) {
16030b18fb0SJim Walker                    doc_type = BINARY_DOC_COMPRESSED;
16130b18fb0SJim Walker                }
16230b18fb0SJim Walker                break;
16330b18fb0SJim Walker            }
16430b18fb0SJim Walker
16530b18fb0SJim Walker            case KEYS_PER_VBUCKET: {
16630b18fb0SJim Walker                keys_per_vbucket = true;
16730b18fb0SJim Walker                break;
16830b18fb0SJim Walker            }
16930b18fb0SJim Walker
17030b18fb0SJim Walker            default: { usage(1); }
17130b18fb0SJim Walker            }
17230b18fb0SJim Walker        } // end of option parsing
17330b18fb0SJim Walker
17430b18fb0SJim Walker        // Now are we managing all vbuckets, or a list?
17530b18fb0SJim Walker        if (optind < argc) {
17630b18fb0SJim Walker            while (optind < argc) {
17730b18fb0SJim Walker                int i = atoi(argv[optind]);
17830b18fb0SJim Walker                if (i < vbc) {
17930b18fb0SJim Walker                    // a or r present?
18030b18fb0SJim Walker                    VBucketState s = VB_ACTIVE;
18130b18fb0SJim Walker                    for (size_t i = 0; i < strlen(argv[optind]); i++) {
18230b18fb0SJim Walker                        if (argv[optind][i] == 'a') {
18330b18fb0SJim Walker                            s = VB_ACTIVE;
18430b18fb0SJim Walker                        } else if (argv[optind][i] == 'r') {
18530b18fb0SJim Walker                            s = VB_REPLICA;
18630b18fb0SJim Walker                        }
18730b18fb0SJim Walker                    }
18830b18fb0SJim Walker                    vbuckets[i] = s;
18930b18fb0SJim Walker                    vbuckets_managed++; // keep track of how many we are
19030b18fb0SJim Walker                    // managing
19130b18fb0SJim Walker                    std::cout << "Managing VB " << i;
19230b18fb0SJim Walker                    if (s == VB_ACTIVE) {
19330b18fb0SJim Walker                        std::cout << " active" << std::endl;
19430b18fb0SJim Walker                    } else {
19530b18fb0SJim Walker                        std::cout << " replica" << std::endl;
19630b18fb0SJim Walker                    }
19730b18fb0SJim Walker
19830b18fb0SJim Walker                    optind++;
19930b18fb0SJim Walker                }
20030b18fb0SJim Walker            }
20130b18fb0SJim Walker        } else {
20230b18fb0SJim Walker            for (int i = 0; i < vbc; i++) {
20330b18fb0SJim Walker                vbuckets[i] = VB_ACTIVE;
20430b18fb0SJim Walker                vbuckets_managed++;
20530b18fb0SJim Walker            }
20630b18fb0SJim Walker        }
20730b18fb0SJim Walker    }
20830b18fb0SJim Walker
20930b18fb0SJim Walker    //
21030b18fb0SJim Walker    // return true if the current parameters are good, else print an error and
21130b18fb0SJim Walker    // return false.
21230b18fb0SJim Walker    //
21330b18fb0SJim Walker    bool validate() const {
21430b18fb0SJim Walker        if (vbc <= 0) {
21530b18fb0SJim Walker            std::cerr << "Error: vbc less than or equal to 0 - " << vbc
21630b18fb0SJim Walker                      << std::endl;
21730b18fb0SJim Walker            return false;
21830b18fb0SJim Walker        }
21930b18fb0SJim Walker
22030b18fb0SJim Walker        // this ensures that the program doesn't run away with no args...
22130b18fb0SJim Walker        if (key_count == 0) {
22230b18fb0SJim Walker            std::cerr << "Key count 0 or not specified, use -k to set key "
22330b18fb0SJim Walker                         "count to "
22430b18fb0SJim Walker                         "greater than 0"
22530b18fb0SJim Walker                      << std::endl;
22630b18fb0SJim Walker            return false;
22730b18fb0SJim Walker        }
22830b18fb0SJim Walker        return true;
22930b18fb0SJim Walker    }
23030b18fb0SJim Walker
23130b18fb0SJim Walker    int16_t get_vbc() const {
23230b18fb0SJim Walker        return vbc;
23330b18fb0SJim Walker    }
23430b18fb0SJim Walker
23530b18fb0SJim Walker    uint64_t get_key_count() const {
23630b18fb0SJim Walker        return key_count;
23730b18fb0SJim Walker    }
23830b18fb0SJim Walker
23930b18fb0SJim Walker    int get_keys_per_flush() const {
24030b18fb0SJim Walker        return keys_per_flush;
24130b18fb0SJim Walker    }
24230b18fb0SJim Walker
24330b18fb0SJim Walker    int get_doc_len() const {
24430b18fb0SJim Walker        return doc_len;
24530b18fb0SJim Walker    }
24630b18fb0SJim Walker
24730b18fb0SJim Walker    bool get_reuse_couch_files() const {
24830b18fb0SJim Walker        return reuse_couch_files;
24930b18fb0SJim Walker    }
25030b18fb0SJim Walker
25130b18fb0SJim Walker    std::string get_doc_type_string() const {
25230b18fb0SJim Walker        switch (doc_type) {
25330b18fb0SJim Walker        case BINARY_DOC: {
25430b18fb0SJim Walker            return std::string("binary");
25530b18fb0SJim Walker            break;
25630b18fb0SJim Walker        }
25730b18fb0SJim Walker        case BINARY_DOC_COMPRESSED: {
25830b18fb0SJim Walker            return std::string("binary compressed");
25930b18fb0SJim Walker            break;
26030b18fb0SJim Walker        }
26130b18fb0SJim Walker        case JSON_DOC: {
26230b18fb0SJim Walker            return std::string("JSON");
26330b18fb0SJim Walker            break;
26430b18fb0SJim Walker        }
26530b18fb0SJim Walker        case JSON_DOC_COMPRESSED: {
26630b18fb0SJim Walker            return std::string("JSON compressed");
26730b18fb0SJim Walker            break;
26830b18fb0SJim Walker        }
26930b18fb0SJim Walker        }
27030b18fb0SJim Walker        return std::string("getDocTypeString failure");
27130b18fb0SJim Walker    }
27230b18fb0SJim Walker
27330b18fb0SJim Walker    DocType get_doc_type() const {
27430b18fb0SJim Walker        return doc_type;
27530b18fb0SJim Walker    }
27630b18fb0SJim Walker
27730b18fb0SJim Walker    bool is_keys_per_vbucket() const {
27830b18fb0SJim Walker        return keys_per_vbucket;
27930b18fb0SJim Walker    }
28030b18fb0SJim Walker
28130b18fb0SJim Walker    bool is_vbucket_managed(int vb) const {
28230b18fb0SJim Walker        if (vb > vbc) {
28330b18fb0SJim Walker            return false;
28430b18fb0SJim Walker        }
28530b18fb0SJim Walker        return vbuckets[vb] != VB_UNMANAGED;
28630b18fb0SJim Walker    }
28730b18fb0SJim Walker
28830b18fb0SJim Walker    int get_vbuckets_managed() {
28930b18fb0SJim Walker        return vbuckets_managed;
29030b18fb0SJim Walker    }
29130b18fb0SJim Walker
29230b18fb0SJim Walker    uint64_t get_start_key() {
29330b18fb0SJim Walker        return start_key;
29430b18fb0SJim Walker    }
29530b18fb0SJim Walker
29630b18fb0SJim Walker    VBucketState get_vbucket_state(int vb) const {
29730b18fb0SJim Walker        return vbuckets[vb];
29830b18fb0SJim Walker    }
29930b18fb0SJim Walker
30030b18fb0SJim Walker    void disable_vbucket(int vb) {
30130b18fb0SJim Walker        static std::mutex lock;
30230b18fb0SJim Walker        std::unique_lock<std::mutex> lck(lock);
30330b18fb0SJim Walker        vbuckets[vb] = VB_UNMANAGED;
30430b18fb0SJim Walker        vbuckets_managed--;
30530b18fb0SJim Walker    }
30630b18fb0SJim Walker
30730b18fb0SJim Walker    bool is_low_compression() {
30830b18fb0SJim Walker        return low_compression;
30930b18fb0SJim Walker    }
31030b18fb0SJim Walker
31130b18fb0SJim Walker    int get_flusher_count() {
31230b18fb0SJim Walker        return flusher_count;
31330b18fb0SJim Walker    }
31430b18fb0SJim Walker
31530b18fb0SJim Walker    static void usage(int exit_code) {
31630b18fb0SJim Walker        std::cerr << std::endl;
31730b18fb0SJim Walker        std::cerr << "couch_create <options> <vbucket list>" << std::endl;
31830b18fb0SJim Walker        std::cerr << "options:" << std::endl;
31930b18fb0SJim Walker        std::cerr << "    --reuse,-r: Reuse couch-files (any re-used file must "
32030b18fb0SJim Walker                     "have "
32130b18fb0SJim Walker                     "a vbstate document) (default "
32230b18fb0SJim Walker                  << reuse_couch_files_default << ")." << std::endl;
32330b18fb0SJim Walker        std::cerr << "    --vbc, -v <integer>:  Number of vbuckets (default "
32430b18fb0SJim Walker                  << vbc_default << ")." << std::endl;
32530b18fb0SJim Walker        std::cerr << "    --keys, -k <integer>:  Number of keys to create "
32630b18fb0SJim Walker                     "(default "
32730b18fb0SJim Walker                  << key_count_default << ")." << std::endl;
32830b18fb0SJim Walker        std::cerr << "    --keys-per-vbucket:  The keys value is how many keys "
32930b18fb0SJim Walker                     "for "
33030b18fb0SJim Walker                     "each vbucket default "
33130b18fb0SJim Walker                  << keys_per_vbucket_default << ")." << std::endl;
33230b18fb0SJim Walker        std::cerr << "    --keys-per-flush, -f <integer>:  Number of keys per "
33330b18fb0SJim Walker                     "vbucket before committing to disk (default "
33430b18fb0SJim Walker                  << keys_per_flush_default << ")." << std::endl;
33530b18fb0SJim Walker        std::cerr << "    --doc-len,-d <integer>:  Number of bytes for the "
33630b18fb0SJim Walker                     "document "
33730b18fb0SJim Walker                     "body (default "
33830b18fb0SJim Walker                  << doc_len_default << ")." << std::endl;
33930b18fb0SJim Walker        std::cerr << "    --doc-type,-t <binary|binarycompressed>:  Document "
34030b18fb0SJim Walker                     "type."
34130b18fb0SJim Walker                  << std::endl;
34230b18fb0SJim Walker        std::cerr << "    --start-key,-s <integer>:  Specify the first key "
34330b18fb0SJim Walker                     "number "
34430b18fb0SJim Walker                     "(default "
34530b18fb0SJim Walker                  << start_key_default << ")." << std::endl;
34630b18fb0SJim Walker        std::cerr << "    --low-compression,-l: Generate documents that don't "
34730b18fb0SJim Walker                     "compress well (default "
34830b18fb0SJim Walker                  << low_compression_default << ")." << std::endl;
34930b18fb0SJim Walker
35030b18fb0SJim Walker        std::cerr << std::endl
35130b18fb0SJim Walker                  << "vbucket list (optional space separated values):"
35230b18fb0SJim Walker                  << std::endl;
35330b18fb0SJim Walker        std::cerr
35430b18fb0SJim Walker                << "    Specify a list of vbuckets to manage and optionally "
35530b18fb0SJim Walker                   "the "
35630b18fb0SJim Walker                   "state. "
35730b18fb0SJim Walker                << std::endl
35830b18fb0SJim Walker                << "E.g. VB 1 can be specified as '1' (defaults to active when "
35930b18fb0SJim Walker                   "creating vbuckets) or '1a' (for active) or '1r' (for "
36030b18fb0SJim Walker                   "replica)."
36130b18fb0SJim Walker                << std::endl
36230b18fb0SJim Walker                << "Omiting the vbucket list means all vbuckets will be "
36330b18fb0SJim Walker                   "created."
36430b18fb0SJim Walker                << std::endl;
36530b18fb0SJim Walker
36630b18fb0SJim Walker        std::cerr
36730b18fb0SJim Walker                << "Two modes of operation:" << std::endl
36830b18fb0SJim Walker                << "    1) Re-use vbuckets (--reuse or -r) \"Automatic mode\":"
36930b18fb0SJim Walker                << std::endl
37030b18fb0SJim Walker                << "    In this mode of operation the program will only write "
37130b18fb0SJim Walker                   "key/values into vbucket files it finds in the current "
37230b18fb0SJim Walker                   "directory."
37330b18fb0SJim Walker                << std::endl
37430b18fb0SJim Walker                << "    Ideally the vbucket files are empty of documents, but "
37530b18fb0SJim Walker                   "must have a vbstate local doc."
37630b18fb0SJim Walker                << std::endl
37730b18fb0SJim Walker                << "    The intent of this mode is for a cluster and bucket to "
37830b18fb0SJim Walker                   "be "
37930b18fb0SJim Walker                   "pre-created, but empty and then to simply "
38030b18fb0SJim Walker                << std::endl
38130b18fb0SJim Walker                << "    populate the files found on each node without having "
38230b18fb0SJim Walker                   "to "
38330b18fb0SJim Walker                   "consider which are active/replica."
38430b18fb0SJim Walker                << std::endl
38530b18fb0SJim Walker                << std::endl;
38630b18fb0SJim Walker        ;
38730b18fb0SJim Walker
38830b18fb0SJim Walker        std::cerr
38930b18fb0SJim Walker                << "    2) Create vbuckets:" << std::endl
39030b18fb0SJim Walker                << "    In this mode of operation the program will create new "
39130b18fb0SJim Walker                   "vbucket files. The user must make the decision about what "
39230b18fb0SJim Walker                   "is "
39330b18fb0SJim Walker                   "active/replica"
39430b18fb0SJim Walker                << std::endl
39530b18fb0SJim Walker                << std::endl;
39630b18fb0SJim Walker
39730b18fb0SJim Walker        std::cerr << "Examples: " << std::endl;
39830b18fb0SJim Walker        std::cerr
39930b18fb0SJim Walker                << "  Create 1024 active vbuckets containing 10,000, 256 byte "
40030b18fb0SJim Walker                   "binary documents."
40130b18fb0SJim Walker                << std::endl;
40230b18fb0SJim Walker        std::cerr << "    > ./couch_create -k 10000" << std::endl << std::endl;
40330b18fb0SJim Walker        std::cerr << "  Iterate over 10,000 keys, but only generate vbuckets "
40430b18fb0SJim Walker                     "0, 1, "
40530b18fb0SJim Walker                     "2 and 3 with a mix of active/replica"
40630b18fb0SJim Walker                  << std::endl;
40730b18fb0SJim Walker        std::cerr << "    > ./couch_create -k 10000 0a 1r 2a 3r" << std::endl
40830b18fb0SJim Walker                  << std::endl;
40930b18fb0SJim Walker        std::cerr
41030b18fb0SJim Walker                << "  Iterate over 10,000 keys and re-use existing couch-files"
41130b18fb0SJim Walker                << std::endl;
41230b18fb0SJim Walker        std::cerr << "    > ./couch_create -k 10000 -r" << std::endl
41330b18fb0SJim Walker                  << std::endl;
41430b18fb0SJim Walker        std::cerr << "  Create 10000 keys for each vbucket and re-use existing "
41530b18fb0SJim Walker                     "couch-files"
41630b18fb0SJim Walker                  << std::endl;
41730b18fb0SJim Walker        std::cerr << "    > ./couch_create -k 10000 --keys-per-vbucket -r"
41830b18fb0SJim Walker                  << std::endl
41930b18fb0SJim Walker                  << std::endl;
42030b18fb0SJim Walker
42130b18fb0SJim Walker        exit(exit_code);
42230b18fb0SJim Walker    }
42330b18fb0SJim Walker
42430b18fb0SJim Walkerprivate:
42530b18fb0SJim Walker    bool reuse_couch_files;
42630b18fb0SJim Walker    int16_t vbc;
42730b18fb0SJim Walker    uint64_t key_count;
42830b18fb0SJim Walker    bool keys_per_vbucket;
42930b18fb0SJim Walker    int keys_per_flush;
43030b18fb0SJim Walker    int doc_len;
43130b18fb0SJim Walker    DocType doc_type;
43230b18fb0SJim Walker    std::vector<VBucketState> vbuckets;
43330b18fb0SJim Walker    int vbuckets_managed;
43430b18fb0SJim Walker    uint64_t start_key;
43530b18fb0SJim Walker    bool low_compression;
43630b18fb0SJim Walker    int flusher_count;
43730b18fb0SJim Walker};
43830b18fb0SJim Walker
43930b18fb0SJim Walker//
44030b18fb0SJim Walker// Class representing a single couchstore document
44130b18fb0SJim Walker//
44230b18fb0SJim Walkerclass Document {
44330b18fb0SJim Walker    class Meta {
44430b18fb0SJim Walker    public:
44530b18fb0SJim Walker        // Create the meta, cas is a millisecond timestamp
44630b18fb0SJim Walker        Meta(std::chrono::time_point<std::chrono::high_resolution_clock>
44730b18fb0SJim Walker                     casTime,
44830b18fb0SJim Walker             uint32_t e,
44930b18fb0SJim Walker             uint32_t f)
45030b18fb0SJim Walker            : cas(std::chrono::duration_cast<std::chrono::microseconds>(
45130b18fb0SJim Walker                          casTime.time_since_epoch())
45230b18fb0SJim Walker                          .count() &
45330b18fb0SJim Walker                  0xFFFF),
45430b18fb0SJim Walker              exptime(e),
45530b18fb0SJim Walker              flags(f),
45630b18fb0SJim Walker              flex_meta_code(0x01),
45730b18fb0SJim Walker              flex_value(0x0) {
45830b18fb0SJim Walker        }
45930b18fb0SJim Walker
46030b18fb0SJim Walker        void set_exptime(uint32_t exptime) {
46130b18fb0SJim Walker            this->exptime = exptime;
46230b18fb0SJim Walker        }
46330b18fb0SJim Walker
46430b18fb0SJim Walker        void set_flags(uint32_t flags) {
46530b18fb0SJim Walker            this->flags = flags;
46630b18fb0SJim Walker        }
46730b18fb0SJim Walker
46830b18fb0SJim Walker        size_t get_size() const {
46930b18fb0SJim Walker            // Not safe to use sizeof(Meta) due to trailing padding
47030b18fb0SJim Walker            return sizeof(cas) + sizeof(exptime) + sizeof(flags) +
47130b18fb0SJim Walker                   sizeof(flex_meta_code) + sizeof(flex_value);
47230b18fb0SJim Walker        }
47330b18fb0SJim Walker
47430b18fb0SJim Walker    public:
47530b18fb0SJim Walker        uint64_t cas;
47630b18fb0SJim Walker        uint32_t exptime;
47730b18fb0SJim Walker        uint32_t flags;
47830b18fb0SJim Walker        uint8_t flex_meta_code;
47930b18fb0SJim Walker        uint8_t flex_value;
48030b18fb0SJim Walker    };
48130b18fb0SJim Walker
48230b18fb0SJim Walkerpublic:
48330b18fb0SJim Walker    Document(const char* k, int klen, ProgramParameters& params, int dlen)
48430b18fb0SJim Walker        : meta(std::chrono::high_resolution_clock::now(), 0, 0),
48530b18fb0SJim Walker          key_len(klen),
48630b18fb0SJim Walker          key(NULL),
48730b18fb0SJim Walker          data_len(dlen),
48830b18fb0SJim Walker          data(NULL),
48930b18fb0SJim Walker          parameters(params),
49030b18fb0SJim Walker          doc_created(0) {
49130b18fb0SJim Walker        key = new char[klen];
49230b18fb0SJim Walker        data = new char[dlen];
49330b18fb0SJim Walker        set_doc(k, klen, dlen);
49430b18fb0SJim Walker        memset(&doc_info, 0, sizeof(DocInfo));
49530b18fb0SJim Walker        memset(&doc, 0, sizeof(Doc));
49630b18fb0SJim Walker        doc.id.buf = key;
49730b18fb0SJim Walker        doc.id.size = klen;
49830b18fb0SJim Walker        doc.data.buf = data;
49930b18fb0SJim Walker        doc.data.size = dlen;
50030b18fb0SJim Walker        doc_info.id = doc.id;
50130b18fb0SJim Walker        doc_info.size = doc.data.size;
50230b18fb0SJim Walker        doc_info.db_seq = 0; // db_seq;
50330b18fb0SJim Walker        doc_info.rev_seq = 1; // ++db_seq;
50430b18fb0SJim Walker
50530b18fb0SJim Walker        if (params.get_doc_type() == BINARY_DOC_COMPRESSED) {
50630b18fb0SJim Walker            doc_info.content_meta =
50730b18fb0SJim Walker                    COUCH_DOC_NON_JSON_MODE | COUCH_DOC_IS_COMPRESSED;
50830b18fb0SJim Walker        } else if (params.get_doc_type() == BINARY_DOC) {
50930b18fb0SJim Walker            doc_info.content_meta = COUCH_DOC_NON_JSON_MODE;
51030b18fb0SJim Walker        } else if (params.get_doc_type() == JSON_DOC_COMPRESSED) {
51130b18fb0SJim Walker            doc_info.content_meta = COUCH_DOC_IS_JSON | COUCH_DOC_IS_COMPRESSED;
51230b18fb0SJim Walker        } else if (params.get_doc_type() == JSON_DOC) {
51330b18fb0SJim Walker            doc_info.content_meta = COUCH_DOC_IS_JSON;
51430b18fb0SJim Walker        } else {
51530b18fb0SJim Walker            doc_info.content_meta = COUCH_DOC_NON_JSON_MODE;
51630b18fb0SJim Walker        }
51730b18fb0SJim Walker
51830b18fb0SJim Walker        doc_info.rev_meta.buf = reinterpret_cast<char*>(&meta);
51930b18fb0SJim Walker        doc_info.rev_meta.size = meta.get_size();
52030b18fb0SJim Walker        doc_info.deleted = 0;
52130b18fb0SJim Walker    }
52230b18fb0SJim Walker
52330b18fb0SJim Walker    ~Document() {
52430b18fb0SJim Walker        delete[] key;
52530b18fb0SJim Walker        delete[] data;
52630b18fb0SJim Walker    }
52730b18fb0SJim Walker
52830b18fb0SJim Walker    void set_doc(const char* k, int klen, int dlen) {
52930b18fb0SJim Walker        if (klen > key_len) {
53030b18fb0SJim Walker            delete key;
53130b18fb0SJim Walker            key = new char[klen];
53230b18fb0SJim Walker            doc.id.buf = key;
53330b18fb0SJim Walker            doc.id.size = klen;
53430b18fb0SJim Walker            doc_info.id = doc.id;
53530b18fb0SJim Walker        }
53630b18fb0SJim Walker        if (dlen > data_len) {
53730b18fb0SJim Walker            delete data;
53830b18fb0SJim Walker            data = new char[dlen];
53930b18fb0SJim Walker            doc.data.buf = data;
54030b18fb0SJim Walker            doc.data.size = dlen;
54130b18fb0SJim Walker        }
54230b18fb0SJim Walker
54330b18fb0SJim Walker        memcpy(key, k, klen);
54430b18fb0SJim Walker        // generate doc body only if size has changed.
54530b18fb0SJim Walker        if (doc_created != dlen) {
54630b18fb0SJim Walker            if (parameters.is_low_compression()) {
54730b18fb0SJim Walker                srand(0);
54830b18fb0SJim Walker                for (int data_index = 0; data_index < dlen; data_index++) {
54930b18fb0SJim Walker                    char data_value = (rand() % 255) % ('Z' - '0');
55030b18fb0SJim Walker                    data[data_index] = data_value + '0';
55130b18fb0SJim Walker                }
55230b18fb0SJim Walker            } else {
55330b18fb0SJim Walker                char data_value = 0;
55430b18fb0SJim Walker                for (int data_index = 0; data_index < dlen; data_index++) {
55530b18fb0SJim Walker                    data[data_index] = data_value + '0';
55630b18fb0SJim Walker                    data_value = (data_value + 1) % ('Z' - '0');
55730b18fb0SJim Walker                }
55830b18fb0SJim Walker            }
55930b18fb0SJim Walker            doc_created = dlen;
56030b18fb0SJim Walker        }
56130b18fb0SJim Walker    }
56230b18fb0SJim Walker
56330b18fb0SJim Walker    Doc* get_doc() {
56430b18fb0SJim Walker        return &doc;
56530b18fb0SJim Walker    }
56630b18fb0SJim Walker
56730b18fb0SJim Walker    DocInfo* get_doc_info() {
56830b18fb0SJim Walker        return &doc_info;
56930b18fb0SJim Walker    }
57030b18fb0SJim Walker
57130b18fb0SJim Walkerprivate:
57230b18fb0SJim Walker    Doc doc;
57330b18fb0SJim Walker    DocInfo doc_info;
57430b18fb0SJim Walker    Meta meta;
57530b18fb0SJim Walker
57630b18fb0SJim Walker    int key_len;
57730b18fb0SJim Walker    char* key;
57830b18fb0SJim Walker    int data_len;
57930b18fb0SJim Walker    char* data;
58030b18fb0SJim Walker    ProgramParameters& parameters;
58130b18fb0SJim Walker    int doc_created;
58230b18fb0SJim Walker    static uint64_t db_seq;
58330b18fb0SJim Walker};
58430b18fb0SJim Walker
58530b18fb0SJim Walkeruint64_t Document::db_seq = 0;
58630b18fb0SJim Walker
58730b18fb0SJim Walker//
58830b18fb0SJim Walker// A class representing a VBucket.
58930b18fb0SJim Walker// This object holds a queue of key/values (documents) and manages their writing
59030b18fb0SJim Walker// to the couch-file.
59130b18fb0SJim Walker//
59230b18fb0SJim Walkerclass VBucket {
59330b18fb0SJim Walkerpublic:
59430b18fb0SJim Walker    class Exception1 : public std::exception {
59530b18fb0SJim Walker        virtual const char* what() const throw() {
59630b18fb0SJim Walker            return "Found an existing couch-file with vbstate and --reuse/-r "
59730b18fb0SJim Walker                   "is not set.";
59830b18fb0SJim Walker        }
59930b18fb0SJim Walker    } exception1;
60030b18fb0SJim Walker
60130b18fb0SJim Walker    class Exception2 : public std::exception {
60230b18fb0SJim Walker        virtual const char* what() const throw() {
60330b18fb0SJim Walker            return "Didn't find valid couch-file (or found file with no "
60430b18fb0SJim Walker                   "vbstate) and --reuse/-r is set.";
60530b18fb0SJim Walker        }
60630b18fb0SJim Walker    } exception2;
60730b18fb0SJim Walker
60830b18fb0SJim Walker    class Exception3 : public std::exception {
60930b18fb0SJim Walker        virtual const char* what() const throw() {
61030b18fb0SJim Walker            return "Error opening couch_file (check ulimit -n).";
61130b18fb0SJim Walker        }
61230b18fb0SJim Walker    } exception3;
61330b18fb0SJim Walker
61430b18fb0SJim Walker    //
61530b18fb0SJim Walker    // Constructor opens file and validates the state.
61630b18fb0SJim Walker    // throws exceptions if not safe to continue
61730b18fb0SJim Walker    //
61830b18fb0SJim Walker    VBucket(char* filename,
61930b18fb0SJim Walker            int vb,
62030b18fb0SJim Walker            uint64_t& saved_counter,
62130b18fb0SJim Walker            ProgramParameters& params_ref)
62230b18fb0SJim Walker        : handle(NULL),
62330b18fb0SJim Walker          next_free_doc(0),
62430b18fb0SJim Walker          flush_threshold(params_ref.get_keys_per_flush()),
62530b18fb0SJim Walker          docs(params_ref.get_keys_per_flush()),
62630b18fb0SJim Walker          pending_documents(0),
62730b18fb0SJim Walker          documents_saved(saved_counter),
62830b18fb0SJim Walker          params(params_ref),
62930b18fb0SJim Walker          vbid(vb),
63030b18fb0SJim Walker          doc_count(0),
63130b18fb0SJim Walker          got_vbstate(false),
63230b18fb0SJim Walker          vb_seq(0),
63330b18fb0SJim Walker          ok_to_set_vbstate(true) {
63430b18fb0SJim Walker        int flags = params.get_reuse_couch_files()
63530b18fb0SJim Walker                            ? COUCHSTORE_OPEN_FLAG_RDONLY
63630b18fb0SJim Walker                            : COUCHSTORE_OPEN_FLAG_CREATE;
63730b18fb0SJim Walker
63830b18fb0SJim Walker        couchstore_error_t err = couchstore_open_db(filename, flags, &handle);
639