xref: /6.6.0/couchstore/src/dbdump.cc (revision b87ff9fe)
1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2#include "couchstore_config.h"
3
4#include "bitfield.h"
5#include "couch_btree.h"
6#include "flatbuffers/idl.h"
7#include "internal.h"
8#include "node_types.h"
9#include "tracking_file_ops.h"
10#include "util.h"
11#include "views/index_header.h"
12#include "views/util.h"
13#include "views/view_group.h"
14#include <collections/kvstore_generated.h>
15#include <inttypes.h>
16#include <libcouchstore/couch_db.h>
17#include <mcbp/protocol/unsigned_leb128.h>
18#include <memcached/protocol_binary.h>
19#include <nlohmann/json.hpp>
20#include <platform/cb_malloc.h>
21#include <platform/cbassert.h>
22#include <platform/sized_buffer.h>
23#include <snappy-c.h>
24#include <stdbool.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <sys/types.h>
29#include <unistd.h>
30#include <xattr/blob.h>
31#include <xattr/utils.h>
32
33#include <iostream>
34
35#define MAX_HEADER_SIZE (64 * 1024)
36
37typedef enum {
38    DumpBySequence,
39    DumpByID,
40    DumpLocals,
41    DumpFileMap,
42} DumpMode;
43
44static DumpMode mode = DumpBySequence;
45static bool dumpTree = false;
46static bool dumpJson = false;
47static bool dumpHex = false;
48static bool oneKey = false;
49static bool dumpBody = true;
50static bool decodeVbucket = true;
51static bool decodeIndex = false;
52static bool decodeNamespace = true;
53static bool iterateHeaders = false;
54static sized_buf dumpKey;
55
56typedef struct {
57    raw_64 cas;
58    raw_32 expiry;
59    raw_32 flags;
60} CouchbaseRevMeta;
61
62// Additional Couchbase V1 metadata:
63struct CouchbaseRevMetaV1 {
64    uint8_t flex_code;
65    uint8_t datatype;
66};
67
68// Additional Couchbase V2 metadata:
69struct CouchbaseRevMetaV2 {
70    uint8_t confResMode;
71};
72
73// Additional Couchbase V3 metadata - SyncReplication state
74struct CouchbaseRevMetaV3 {
75    uint8_t operation;
76    uint8_t level;
77
78    const char* getOperationName() const {
79        switch (operation) {
80        case 0:
81            return "pending";
82        case 1:
83            return "commit";
84        case 2:
85            return "abort";
86        default:
87            return "<INVALID>";
88        }
89    }
90    const char* getLevelName() const {
91        switch (level) {
92        case 0:
93            return "none";
94        case 1:
95            return "majority";
96        case 2:
97            return "majorityAndPersistOnMaster";
98        case 3:
99            return "persistToMajority";
100        default:
101            return "<INVALID>";
102        }
103    }
104};
105
106extern const std::string vbucket_serialised_manifest_entry_raw_schema;
107extern const std::string collections_kvstore_schema;
108
109static int view_btree_cmp(const sized_buf *key1, const sized_buf *key2)
110{
111    return view_key_cmp(key1, key2, NULL);
112}
113
114static void printsb(const sized_buf *sb)
115{
116    if (sb->buf == NULL) {
117        printf("null\n");
118        return;
119    }
120    printf("%.*s\n", (int) sb->size, sb->buf);
121}
122
123static void printsbhexraw(const sized_buf* sb) {
124    size_t ii;
125    for (ii = 0; ii < sb->size; ++ii) {
126        printf("%.02x", (uint8_t)sb->buf[ii]);
127    }
128}
129
130static void printsbhex(const sized_buf *sb, int with_ascii)
131{
132    size_t i;
133
134    if (sb->buf == NULL) {
135        printf("null\n");
136        return;
137    }
138    printf("{");
139    for (i = 0; i < sb->size; ++i) {
140        printf("%.02x", (uint8_t)sb->buf[i]);
141        if (i % 4 == 3) {
142            printf(" ");
143        }
144    }
145    printf("}");
146    if (with_ascii) {
147        printf("  (\"");
148        for (i = 0; i < sb->size; ++i) {
149            uint8_t ch = sb->buf[i];
150            if (ch < 32 || ch >= 127) {
151                ch = '?';
152            }
153            printf("%c", ch);
154        }
155        printf("\")");
156    }
157    printf("\n");
158}
159
160static void printjquote(const sized_buf *sb)
161{
162    const char* i = sb->buf;
163    const char* end = sb->buf + sb->size;
164    if (sb->buf == NULL) {
165        return;
166    }
167    for (; i < end; i++) {
168        if (*i > 31 && *i != '\"' && *i != '\\') {
169            fputc(*i, stdout);
170        } else {
171            fputc('\\', stdout);
172            switch(*i)
173            {
174                case '\\': fputc('\\', stdout);break;
175                case '\"': fputc('\"', stdout);break;
176                case '\b': fputc('b', stdout);break;
177                case '\f': fputc('f', stdout);break;
178                case '\n': fputc('n', stdout);break;
179                case '\r': fputc('r', stdout);break;
180                case '\t': fputc('t', stdout);break;
181                default:
182                           printf("u00%.02x", *i);
183            }
184        }
185    }
186}
187
188static void print_datatype_as_json(const std::string& datatype) {
189    printf("\"datatype_as_text\":[");
190
191    std::string::size_type start = 0;
192    std::string::size_type end;
193    bool need_comma = false;
194    while ((end = datatype.find(',', start)) != std::string::npos) {
195        auto token = datatype.substr(start, end - start);
196        if (need_comma) {
197            printf(",");
198        }
199        printf("\"%s\"", token.c_str());
200        start = end + 1;
201        need_comma = true;
202    }
203
204    if (need_comma) {
205        printf(",");
206    }
207    auto token = datatype.substr(start);
208    printf("\"%s\"", token.c_str());
209    printf("]");
210}
211
212static std::string getNamespaceString(uint32_t ns) {
213    switch (ns) {
214    case 0:
215        return "collection:0x0:default";
216    case 1:
217        return "system-event-key:";
218    case 2:
219        return "prepare:";
220    default:
221        std::stringstream ss;
222        ss << "collection:0x" << std::hex << ns;
223        return ss.str();
224    }
225}
226
227static void printDocId(const char* prefix, const sized_buf* sb) {
228    if (decodeNamespace && sb->size >= sizeof(uint32_t)) {
229        // Decode the collection-ID of the key
230        auto decoded =
231            cb::mcbp::decode_unsigned_leb128<uint32_t>
232                ({reinterpret_cast<uint8_t*>(sb->buf), sb->size});
233
234        // Load the key
235        std::string key(reinterpret_cast<const char*>(decoded.second.data()),
236                        decoded.second.size());
237
238        auto name = getNamespaceString(decoded.first);
239
240        if (decoded.first == 2) {
241            // Synchronous Replication 'Prepare' namespace prefix.
242            // Decode again.
243            decoded =
244                    cb::mcbp::decode_unsigned_leb128<uint32_t>(decoded.second);
245            key = std::string(reinterpret_cast<const char*>(decoded.second.data()),
246                            decoded.second.size());
247            name = name + getNamespaceString(decoded.first);
248        }
249
250        // Some keys in the system event namespace have a format we can decode:
251        // \1_collection:<affected collection-id leb128>
252        // \1_scope:<affected scope-id leb128>
253        std::string collectionsPrefix("_collection:");
254        std::string scopePrefix("_scope:");
255
256        if (decoded.first == 1) {
257            // System event namespace
258            if (std::mismatch(collectionsPrefix.begin(),
259                              collectionsPrefix.end(),
260                              key.begin())
261                        .first == collectionsPrefix.end()) {
262                uint32_t affectedCid =
263                        cb::mcbp::decode_unsigned_leb128<uint32_t>(
264                                {reinterpret_cast<const uint8_t*>(
265                                         decoded.second.data() +
266                                         collectionsPrefix.size()),
267                                 decoded.second.size() -
268                                         collectionsPrefix.size()})
269                                .first;
270                std::stringstream ss;
271                ss << name << "collection:0x" << std::hex << affectedCid;
272                name = ss.str();
273            } else if (std::mismatch(scopePrefix.begin(),
274                                     scopePrefix.end(),
275                                     key.begin())
276                               .first == scopePrefix.end()) {
277                uint32_t affectedSid =
278                        cb::mcbp::decode_unsigned_leb128<uint32_t>(
279                                {reinterpret_cast<const uint8_t*>(
280                                         decoded.second.data() +
281                                         scopePrefix.size()),
282                                 decoded.second.size() - scopePrefix.size()})
283                                .first;
284                std::stringstream ss;
285                ss << name << "scope:0x" << std::hex << affectedSid;
286                name = ss.str();
287            }
288        }
289        printf("%s(%s) %s\n",
290               prefix,
291               name.c_str(),
292               key.c_str());
293    } else {
294        printf("%s%.*s\n", prefix, (int)sb->size, sb->buf);
295    }
296}
297
298static int foldprint(Db *db, DocInfo *docinfo, void *ctx)
299{
300    int *count = (int *) ctx;
301    Doc *doc = NULL;
302    uint64_t cas;
303    uint32_t expiry, flags;
304    protocol_binary_datatype_t datatype = PROTOCOL_BINARY_RAW_BYTES;
305    bool ttl_delete = false;
306    couchstore_error_t docerr;
307    (*count)++;
308
309    if (dumpJson) {
310        printf("{\"seq\":%" PRIu64 ",\"id\":\"", docinfo->db_seq);
311        printjquote(&docinfo->id);
312        printf("\",");
313    } else {
314        if (mode == DumpBySequence) {
315            printf("Doc seq: %" PRIu64 "\n", docinfo->db_seq);
316            printDocId("     id: ", &docinfo->id);
317        } else {
318            printDocId("  Doc ID: ", &docinfo->id);
319            if (docinfo->db_seq > 0) {
320                printf("     seq: %" PRIu64 "\n", docinfo->db_seq);
321            }
322        }
323    }
324    if (docinfo->bp == 0 && docinfo->deleted == 0 && !dumpJson) {
325        printf("         ** This b-tree node is corrupt; raw node value follows:*\n");
326        printf("    raw: ");
327        printsbhex(&docinfo->rev_meta, 1);
328        return 0;
329    }
330    if (dumpJson) {
331        printf("\"rev\":%" PRIu64 ",\"content_meta\":%d,", docinfo->rev_seq,
332                                                         docinfo->content_meta);
333        printf("\"physical_size\":%" PRIu64 ",", (uint64_t)docinfo->size);
334    } else {
335        printf("     rev: %" PRIu64 "\n", docinfo->rev_seq);
336        printf("     content_meta: %d\n", docinfo->content_meta);
337        printf("     size (on disk): %" PRIu64 "\n", (uint64_t)docinfo->size);
338    }
339
340    if (docinfo->rev_meta.size >= sizeof(CouchbaseRevMeta)) {
341        const CouchbaseRevMeta* meta = (const CouchbaseRevMeta*)docinfo->rev_meta.buf;
342        cas = decode_raw64(meta->cas);
343        expiry = decode_raw32(meta->expiry);
344        flags = decode_raw32(meta->flags);
345        if (dumpJson) {
346            printf("\"cas\":\"%" PRIu64 "\",\"expiry\":%" PRIu32
347                   ",\"flags\":%" PRIu32,
348                   cas,
349                   expiry,
350                   flags);
351        } else {
352            printf("     cas: %" PRIu64 ", expiry: %" PRIu32
353                   ", flags: %" PRIu32,
354                   cas,
355                   expiry,
356                   flags);
357        }
358    }
359
360    if (docinfo->rev_meta.size >=
361        sizeof(CouchbaseRevMeta) + sizeof(CouchbaseRevMetaV1)) {
362        // 18 bytes of rev_meta indicates CouchbaseRevMetaV1 - adds
363        // flex_meta_code (1B) and datatype (1B)
364        if (docinfo->rev_meta.size <
365            sizeof(CouchbaseRevMeta) + sizeof(CouchbaseRevMetaV1)) {
366            printf("     Error parsing the document: Possible corruption\n");
367            return 1;
368        }
369        const auto* metaV1 =
370                (const CouchbaseRevMetaV1*)(docinfo->rev_meta.buf +
371                                            sizeof(CouchbaseRevMeta));
372
373        if (metaV1->flex_code < 0x01) {
374            printf("     Error: Flex code mismatch (bad code: %d)\n",
375                   metaV1->flex_code);
376            return 1;
377        }
378        ttl_delete = ((metaV1->flex_code << 7) & 0x1) == 1;
379
380        datatype = metaV1->datatype;
381        const auto datatype_string = mcbp::datatype::to_string(datatype);
382
383        if (dumpJson) {
384            printf(",\"datatype\":%d,", datatype);
385            print_datatype_as_json(datatype_string);
386        } else {
387            printf(", datatype: 0x%02x (%s)",
388                   datatype,
389                   datatype_string.c_str());
390        }
391    }
392
393    if (docinfo->rev_meta.size == sizeof(CouchbaseRevMeta) +
394                                          sizeof(CouchbaseRevMetaV1) +
395                                          sizeof(CouchbaseRevMetaV2)) {
396        // 19 bytes of rev_meta indicates CouchbaseRevMetaV2 - adds
397        // resolution flag (1B).
398        // Note: This is no longer written since Watson; but could still
399        // exist in old files.
400        const auto* metaV2 =
401                (const CouchbaseRevMetaV2*)(docinfo->rev_meta.buf +
402                                            sizeof(CouchbaseRevMeta) +
403                                            sizeof(CouchbaseRevMetaV1));
404
405        const auto conf_res_mode = metaV2->confResMode;
406
407        if (dumpJson) {
408            printf(",\"conflict_resolution_mode\":%d", conf_res_mode);
409        } else {
410            printf(", conflict_resolution_mode: %d", conf_res_mode);
411        }
412    }
413
414    if (docinfo->rev_meta.size == sizeof(CouchbaseRevMeta) +
415                                          sizeof(CouchbaseRevMetaV1) +
416                                          sizeof(CouchbaseRevMetaV3)) {
417        // 21 bytes of rev_meta indicates CouchbaseRevMetaV3 - adds
418        // Synchronous Replication state.
419        const auto* metaV3 =
420                (const CouchbaseRevMetaV3*)(docinfo->rev_meta.buf +
421                                            sizeof(CouchbaseRevMeta) +
422                                            sizeof(CouchbaseRevMetaV1));
423
424        if (dumpJson) {
425            printf(",\"sync_write\":\"%s\"", metaV3->getOperationName());
426            if (metaV3->operation == 0 /*Pending*/) {
427                printf(",\"level\":\"%s\"", metaV3->getLevelName());
428            }
429        } else {
430            printf(", sync_write: %s", metaV3->getOperationName());
431            if (metaV3->operation == 0 /*Pending*/) {
432                printf(" [level: %s]", metaV3->getLevelName());
433            }
434        }
435    }
436
437    if (!dumpJson) {
438        printf("\n");
439    }
440
441    if (docinfo->deleted) {
442        const char* deleteSource = ttl_delete ? "TTL" : "explicit";
443        if (dumpJson) {
444            printf(",\"deleted\":\"%s\"", deleteSource);
445        } else {
446            printf("     doc deleted (%s)\n", deleteSource);
447        }
448    }
449
450    if (dumpBody) {
451        docerr = couchstore_open_doc_with_docinfo(db, docinfo, &doc, DECOMPRESS_DOC_BODIES);
452        if (docerr != COUCHSTORE_SUCCESS) {
453            if (dumpJson) {
454                printf(",\"body\":null}\n");
455            } else {
456                printf("     could not read document body: %s\n", couchstore_strerror(docerr));
457            }
458        } else if (doc) {
459            std::string xattrs;
460            sized_buf body = doc->data;
461
462            // If datatype is snappy (and not marked compressed) we must inflate
463            cb::compression::Buffer inflated;
464            if (mcbp::datatype::is_snappy(datatype) &&
465                !(docinfo->content_meta & COUCH_DOC_IS_COMPRESSED)) {
466                // Inflate the entire document so we can work with it
467                if (!cb::compression::inflate(
468                            cb::compression::Algorithm::Snappy,
469                            {doc->data.buf, doc->data.size},
470                            inflated)) {
471                    if (dumpJson) {
472                        printf(",\"body\":null}\n");
473                    } else {
474                        printf("     could not inflate document body\n");
475                    }
476                    return 0;
477                }
478
479                body = _sized_buf{inflated.data(), inflated.size()};
480            }
481
482            if (mcbp::datatype::is_xattr(datatype)) {
483                cb::xattr::Blob blob({body.buf, body.size}, false);
484                xattrs = blob.to_json().dump();
485                body = _sized_buf{body.buf + blob.size(),
486                                  body.size - blob.size()};
487            }
488
489            if (dumpJson) {
490                printf(",\"size\":%" PRIu64 ",", (uint64_t)doc->data.size);
491                if (docinfo->content_meta & COUCH_DOC_IS_COMPRESSED) {
492                    printf("\"snappy\":true,\"display\":\"inflated\",");
493                }
494
495                if (xattrs.size() > 0) {
496                    sized_buf xa{const_cast<char*>(xattrs.data()), xattrs.size()};
497                    printf("\"xattr\":\"");
498                    printjquote(&xa);
499                    printf("\",");
500                }
501
502                printf("\"body\":\"");
503                printjquote(&body);
504                printf("\"}\n");
505            } else {
506                printf("     size: %" PRIu64 "\n", (uint64_t)doc->data.size);
507                if (xattrs.size() > 0) {
508                    printf("     xattrs: ");
509                    sized_buf xa{const_cast<char*>(xattrs.data()), xattrs.size()};
510                    if (dumpHex) {
511                        printsbhexraw(&xa);
512                        printf("\n");
513                    } else {
514                        printsb(&xa);
515                    }
516                }
517                printf("     data: ");
518
519                if (docinfo->content_meta & COUCH_DOC_IS_COMPRESSED) {
520                    printf("(snappy) ");
521                }
522
523                if (dumpHex) {
524                    printsbhexraw(&body);
525                    printf("\n");
526                } else {
527                    printsb(&body);
528                }
529            }
530        }
531    } else {
532        if (dumpJson) {
533            printf("\"body\":null}\n");
534        } else {
535            printf("\n");
536        }
537    }
538
539    couchstore_free_document(doc);
540    return 0;
541}
542
543
544static int visit_node(Db *db,
545                      int depth,
546                      const DocInfo* docinfo,
547                      uint64_t subtreeSize,
548                      const sized_buf* reduceValue,
549                      void *ctx)
550{
551    int i;
552    (void) db;
553
554    for (i = 0; i < depth; ++i)
555        printf("  ");
556    if (reduceValue) {
557        /* This is a tree node: */
558        printf("+ (%" PRIu64 ") ", subtreeSize);
559        printsbhex(reduceValue, 0);
560    } else if (docinfo->bp > 0) {
561        int *count;
562        /* This is a document: */
563        printf("%c (%" PRIu64 ") ", (docinfo->deleted ? 'x' : '*'),
564               (uint64_t)docinfo->size);
565        if (mode == DumpBySequence) {
566            printf("#%" PRIu64 " ", docinfo->db_seq);
567        }
568        printDocId(" id:", &docinfo->id);
569
570        count = (int *) ctx;
571        (*count)++;
572    } else {
573        /* Document, but not in a known format: */
574        printf("**corrupt?** ");
575        printsbhex(&docinfo->rev_meta, 1);
576    }
577    return 0;
578}
579
580/// Visitor function for filemap mode - just trigger a read of the document
581/// so the FileMap ops can record where they reside on disk.
582static int filemap_visit(Db* db,
583                         int depth,
584                         const DocInfo* docinfo,
585                         uint64_t subtreeSize,
586                         const sized_buf* reduceValue,
587                         void* ctx) {
588    if (docinfo == nullptr) {
589        // Tree node.
590        return 0;
591    }
592    Doc* doc = nullptr;
593    ScopedFileTag tag(db->file.ops, db->file.handle, FileTag::Document);
594    couchstore_open_doc_with_docinfo(db, docinfo, &doc, DECOMPRESS_DOC_BODIES);
595    couchstore_free_document(doc);
596    return 0;
597}
598
599static int noop_visit(Db* db,
600                      int depth,
601                      const DocInfo* docinfo,
602                      uint64_t subtreeSize,
603                      const sized_buf* reduceValue,
604                      void* ctx) {
605    return 0;
606}
607
608
609template<class RootType>
610static couchstore_error_t read_collection_flatbuffer_collections(
611        const std::string& name,
612        const std::string& rootType,
613        const sized_buf* v,
614        std::string& out) {
615    flatbuffers::Verifier verifier(reinterpret_cast<uint8_t*>(v->buf), v->size);
616    if (!verifier.VerifyBuffer<RootType>(nullptr)) {
617        std::cerr << "WARNING: \"" << name
618                  << "\" root:" << rootType << ", contains invalid "
619                     "flatbuffers data of size:"
620                  << v->size << std::endl;
621        ;
622        return COUCHSTORE_ERROR_CORRUPT;
623    }
624
625    // Use flatbuffers::Parser to generate JSON output of the binary blob
626    flatbuffers::IDLOptions idlOptions;
627
628    // Configure IDL
629    // strict_json:true adds quotes to keys
630    // indent_step < 0: no indent and no newlines, external tools can format
631    idlOptions.strict_json = true;
632    idlOptions.indent_step = -1;
633    flatbuffers::Parser parser(idlOptions);
634    parser.Parse(collections_kvstore_schema.c_str());
635    parser.SetRootType(rootType.c_str());
636    std::string jsongen;
637    GenerateText(parser, v->buf, &out);
638    return COUCHSTORE_SUCCESS;
639}
640
641static couchstore_error_t read_collection_leb128_metadata(const sized_buf* v,
642                                                          std::string& out) {
643    uint64_t count = 0;
644    uint64_t seqno = 0;
645
646    auto decoded1 = cb::mcbp::decode_unsigned_leb128<uint64_t>(
647            {reinterpret_cast<uint8_t*>(v->buf), v->size});
648    count = decoded1.first;
649
650    if (decoded1.second.size()) {
651        seqno = cb::mcbp::decode_unsigned_leb128<uint64_t>(decoded1.second)
652                        .first;
653    }
654
655    std::stringstream ss;
656    ss << R"({"item_count":)" << count << R"(, "high_seqno":)" << seqno << "}";
657    out = ss.str();
658
659    return COUCHSTORE_SUCCESS;
660}
661
662static couchstore_error_t maybe_decode_local_doc(const sized_buf* id,
663                                                 const sized_buf* v,
664                                                 std::string& decodedData) {
665    // Check for known non-JSON meta-data documents
666    if (strncmp(id->buf, "_local/collections/open", id->size) == 0) {
667        return read_collection_flatbuffer_collections<Collections::KVStore::OpenCollections>(
668                id->buf, "OpenCollections", v, decodedData);
669    } else if (strncmp(id->buf, "_local/collections/dropped", id->size) == 0) {
670        return read_collection_flatbuffer_collections<Collections::KVStore::DroppedCollections>(
671                id->buf, "DroppedCollections", v, decodedData);
672    } else if (strncmp(id->buf, "_local/scope/open", id->size) == 0) {
673        return read_collection_flatbuffer_collections<Collections::KVStore::Scopes>(
674                id->buf, "Scopes", v, decodedData);
675    } else if (strncmp(id->buf, "_local/collections/manifest", id->size) == 0) {
676        return read_collection_flatbuffer_collections<Collections::KVStore::CommittedManifest>(
677                id->buf, "CommittedManifest", v, decodedData);
678    } else if (id->buf[0] == '|') {
679        return read_collection_leb128_metadata(v, decodedData);
680    }
681
682    // Nothing todo
683    return COUCHSTORE_SUCCESS;
684}
685
686static couchstore_error_t local_doc_print(couchfile_lookup_request *rq,
687                                          const sized_buf *k,
688                                          const sized_buf *v)
689{
690    int* count = (int*) rq->callback_ctx;
691    if (!v) {
692        return COUCHSTORE_ERROR_DOC_NOT_FOUND;
693    }
694    (*count)++;
695    sized_buf* id = (sized_buf*)k;
696    sized_buf value = {v->buf, v->size};
697
698    printf("Key: ");
699    printsb(id);
700
701    std::string decodedData;
702    auto rv = maybe_decode_local_doc(k, v, decodedData);
703
704    if (rv != COUCHSTORE_SUCCESS) {
705        return rv;
706    }
707
708    if (!decodedData.empty()) {
709        value.buf = const_cast<char*>(decodedData.data());
710        value.size = decodedData.size();
711    }
712
713    printf("Value: ");
714    printsb(&value);
715    printf("\n");
716
717    return COUCHSTORE_SUCCESS;
718}
719
720static couchstore_error_t local_doc_print_json(couchfile_lookup_request* rq,
721                                               const sized_buf* k,
722                                               const sized_buf* v) {
723    int* count = (int*)rq->callback_ctx;
724    if (!v) {
725        return COUCHSTORE_ERROR_DOC_NOT_FOUND;
726    }
727    (*count)++;
728    sized_buf value = {v->buf, v->size};
729
730    std::string decodedData;
731    auto rv = maybe_decode_local_doc(k, v, decodedData);
732
733    if (rv != COUCHSTORE_SUCCESS) {
734        return rv;
735    }
736
737    if (!decodedData.empty()) {
738        value.buf = const_cast<char*>(decodedData.data());
739        value.size = decodedData.size();
740    }
741
742    nlohmann::json parsed;
743     parsed["id"] = std::string(k->buf, k->size);
744    try {
745        parsed["value"] = nlohmann::json::parse(value.buf, value.buf + value.size);
746    } catch (const nlohmann::json::exception& e) {
747        std::cerr << "WARNING: Failed nlohmann::json::parse of id:";
748        std::cerr.write(k->buf, k->size);
749        std::cerr << " with value:";
750        std::cerr.write(value.buf, value.size);
751        std::cerr << std::endl;
752        return COUCHSTORE_ERROR_CORRUPT;
753    }
754
755
756    std::cout << parsed.dump() << std::endl;
757
758    return COUCHSTORE_SUCCESS;
759}
760
761static couchstore_error_t local_doc_ignore(couchfile_lookup_request* rq,
762                                           const sized_buf* k,
763                                           const sized_buf* v) {
764    return COUCHSTORE_SUCCESS;
765}
766
767typedef couchstore_error_t (*fetch_callback_fn)(
768        struct couchfile_lookup_request* rq,
769        const sized_buf* k,
770        const sized_buf* v);
771
772static couchstore_error_t couchstore_print_local_docs(
773        Db* db, fetch_callback_fn fetch_cb, int* count) {
774    sized_buf key;
775    sized_buf *keylist = &key;
776    couchfile_lookup_request rq;
777    couchstore_error_t errcode;
778
779    if (db->header.local_docs_root == NULL) {
780        if (oneKey) {
781            return COUCHSTORE_ERROR_DOC_NOT_FOUND;
782        } else {
783            return COUCHSTORE_SUCCESS;
784        }
785    }
786
787    key.buf = (char *)"\0";
788    key.size = 0;
789
790    rq.cmp.compare = ebin_cmp;
791    rq.file = &db->file;
792    rq.num_keys = 1;
793    rq.keys = &keylist;
794    rq.callback_ctx = count;
795    rq.fetch_callback = fetch_cb;
796    rq.node_callback = NULL;
797    rq.fold = 1;
798
799    if (oneKey) {
800        rq.fold = 0;
801        key = dumpKey;
802    }
803
804    errcode = btree_lookup(&rq, db->header.local_docs_root->pointer);
805    return errcode;
806}
807
808static int process_vbucket_file(const char *file, int *total)
809{
810    Db *db;
811    couchstore_error_t errcode;
812    int count = 0;
813
814    TrackingFileOps* trackingFileOps = nullptr;
815    couchstore_open_flags flags = COUCHSTORE_OPEN_FLAG_RDONLY;
816    if (mode == DumpFileMap) {
817        flags |= COUCHSTORE_OPEN_FLAG_UNBUFFERED;
818        trackingFileOps = new TrackingFileOps();
819        errcode = couchstore_open_db_ex(file, flags, trackingFileOps, &db);
820    } else {
821        errcode = couchstore_open_db(file, flags, &db);
822    }
823    if (errcode != COUCHSTORE_SUCCESS) {
824        fprintf(stderr, "Failed to open \"%s\": %s\n",
825                file, couchstore_strerror(errcode));
826        return -1;
827    } else {
828        printf("Dumping \"%s\":\n", file);
829    }
830
831next_header:
832    switch (mode) {
833    case DumpBySequence:
834        if (dumpTree) {
835            errcode = couchstore_walk_seq_tree(
836                    db, 0, COUCHSTORE_TOLERATE_CORRUPTION,
837                    visit_node, &count);
838        } else {
839            errcode = couchstore_changes_since(
840                    db, 0, COUCHSTORE_TOLERATE_CORRUPTION,
841                    foldprint, &count);
842        }
843        break;
844    case DumpByID:
845        if (dumpTree) {
846            errcode = couchstore_walk_id_tree(
847                    db, NULL, COUCHSTORE_TOLERATE_CORRUPTION,
848                    visit_node, &count);
849        } else if (oneKey) {
850            DocInfo* info;
851            errcode = couchstore_docinfo_by_id(db, dumpKey.buf, dumpKey.size, &info);
852            if (errcode == COUCHSTORE_SUCCESS) {
853                foldprint(db, info, &count);
854                couchstore_free_docinfo(info);
855            }
856        } else {
857            errcode = couchstore_all_docs(
858                    db, NULL, COUCHSTORE_TOLERATE_CORRUPTION,
859                    foldprint, &count);
860        }
861        break;
862    case DumpLocals:
863        if (dumpJson) {
864            errcode = couchstore_print_local_docs(
865                    db, local_doc_print_json, &count);
866        } else {
867            errcode = couchstore_print_local_docs(db, local_doc_print, &count);
868        }
869        break;
870
871    case DumpFileMap:
872        // Visit all three indexes in the file. Note we don't actually need to
873        // do anything in the callback; the map is built up using a custom
874        // FileOps class and annotations in couchstore itself to tag the
875        // different structures.
876        cb_assert(trackingFileOps != nullptr);
877        trackingFileOps->setTree(db->file.handle,
878                                 TrackingFileOps::Tree::Sequence);
879        couchstore_walk_seq_tree(
880                db, 0, COUCHSTORE_TOLERATE_CORRUPTION, filemap_visit, &count);
881
882        // Note for the ID tree we specify a different (noop) callback; as we
883        // don't want or need to read the document bodies again.
884        trackingFileOps->setTree(db->file.handle, TrackingFileOps::Tree::Id);
885        couchstore_walk_id_tree(
886                db, NULL, COUCHSTORE_TOLERATE_CORRUPTION, noop_visit, &count);
887
888        trackingFileOps->setTree(db->file.handle, TrackingFileOps::Tree::Local);
889        int dummy = 0;
890        couchstore_print_local_docs(db, local_doc_ignore, &dummy);
891
892        // Mark that we are now on old headers
893        trackingFileOps->setHistoricData(db->file.handle, true);
894        break;
895    }
896    if (iterateHeaders) {
897        if (couchstore_rewind_db_header(db) == COUCHSTORE_SUCCESS) {
898            printf("\n");
899            goto next_header;
900        }
901    } else { /* rewind_db_header does its own cleanup on failure */
902        couchstore_close_file(db);
903        couchstore_free_db(db);
904    }
905
906    if (errcode < 0) {
907        fprintf(stderr, "Failed to dump database \"%s\": %s\n",
908                file, couchstore_strerror(errcode));
909        return -1;
910    }
911
912    *total += count;
913    return 0;
914}
915
916static couchstore_error_t lookup_callback(couchfile_lookup_request *rq,
917                                          const sized_buf *k,
918                                          const sized_buf *v)
919{
920    const uint16_t json_key_len = decode_raw16(*((raw_16 *) k->buf));
921    sized_buf json_key;
922    sized_buf json_value;
923
924    json_key.buf = k->buf + sizeof(uint16_t);
925    json_key.size = json_key_len;
926
927    json_value.size = v->size - sizeof(raw_kv_length);
928    json_value.buf = v->buf + sizeof(raw_kv_length);
929
930    if (dumpJson) {
931        printf("{\"id\":\"");
932        printjquote(&json_key);
933        printf("\",\"data\":\"");
934        printjquote(&json_value);
935        printf("\"}\n");
936    } else {
937        printf("Doc ID: ");
938        printsb(&json_key);
939        printf("data: ");
940        printsb(&json_value);
941    }
942
943    printf("\n");
944    rq->num_keys++;
945
946    return COUCHSTORE_SUCCESS;
947}
948
949static couchstore_error_t find_view_header_at_pos(view_group_info_t *info,
950                                                cs_off_t pos)
951{
952    couchstore_error_t errcode = COUCHSTORE_SUCCESS;
953    uint8_t buf;
954    ssize_t readsize = info->file.ops->pread(&info->file.lastError,
955                                            info->file.handle,
956                                            &buf, 1, pos);
957    error_unless(readsize == 1, static_cast<couchstore_error_t>(readsize));
958    if (buf == 0) {
959        return COUCHSTORE_ERROR_NO_HEADER;
960    } else if (buf != 1) {
961        return COUCHSTORE_ERROR_CORRUPT;
962    }
963
964    info->header_pos = pos;
965
966    return COUCHSTORE_SUCCESS;
967
968cleanup:
969    return errcode;
970}
971
972static couchstore_error_t find_view_header(view_group_info_t *info,
973                                        int64_t start_pos)
974{
975    couchstore_error_t last_header_errcode = COUCHSTORE_ERROR_NO_HEADER;
976    int64_t pos = start_pos;
977    pos -= pos % COUCH_BLOCK_SIZE;
978    for (; pos >= 0; pos -= COUCH_BLOCK_SIZE) {
979        couchstore_error_t errcode = find_view_header_at_pos(info, pos);
980        switch(errcode) {
981            case COUCHSTORE_SUCCESS:
982                // Found it!
983                return COUCHSTORE_SUCCESS;
984            case COUCHSTORE_ERROR_NO_HEADER:
985                // No header here, so keep going
986                break;
987            case COUCHSTORE_ERROR_ALLOC_FAIL:
988                // Fatal error
989                return errcode;
990            default:
991                // Invalid header; continue, but remember the last error
992                last_header_errcode = errcode;
993                break;
994        }
995    }
996    return last_header_errcode;
997}
998
999static int process_view_file(const char *file, int *total)
1000{
1001    view_group_info_t *info;
1002    couchstore_error_t errcode;
1003    index_header_t *header = NULL;
1004    char *header_buf = NULL;
1005    int header_len;
1006
1007    info = (view_group_info_t *)cb_calloc(1, sizeof(view_group_info_t));
1008    if (info == NULL) {
1009        fprintf(stderr, "Unable to allocate memory\n");
1010        return -1;
1011    }
1012    info->type = VIEW_INDEX_TYPE_MAPREDUCE;
1013
1014    errcode = open_view_group_file(file, COUCHSTORE_OPEN_FLAG_RDONLY, &info->file);
1015    if (errcode != COUCHSTORE_SUCCESS) {
1016        fprintf(stderr, "Failed to open \"%s\": %s\n",
1017                file, couchstore_strerror(errcode));
1018        return -1;
1019    } else {
1020        printf("Dumping \"%s\":\n", file);
1021    }
1022
1023    info->file.pos = info->file.ops->goto_eof(&info->file.lastError,
1024                                              info->file.handle);
1025
1026    errcode = find_view_header(info, info->file.pos - 2);
1027    if (errcode != COUCHSTORE_SUCCESS) {
1028        fprintf(stderr, "Unable to find header position \"%s\": %s\n",
1029                file, couchstore_strerror(errcode));
1030        return -1;
1031    }
1032
1033    header_len = pread_header(&info->file, (cs_off_t)info->header_pos, &header_buf,
1034                            MAX_HEADER_SIZE);
1035
1036    if (header_len < 0) {
1037        return -1;
1038    }
1039
1040    errcode = decode_index_header(header_buf, (size_t) header_len, &header);
1041    if (errcode != COUCHSTORE_SUCCESS) {
1042        fprintf(stderr, "Unable to decode header \"%s\": %s\n",
1043                file, couchstore_strerror(errcode));
1044        return -1;
1045    }
1046    cb_free(header_buf);
1047    printf("Num views: %d\n", header->num_views);
1048
1049    for (int i = 0; i < header->num_views; ++i) {
1050        printf("\nKV pairs from index: %d\n", i);
1051        sized_buf nullkey = {NULL, 0};
1052        sized_buf *lowkeys = &nullkey;
1053        couchfile_lookup_request rq;
1054
1055        rq.cmp.compare = view_btree_cmp;
1056        rq.file = &info->file;
1057        rq.num_keys = 1;
1058        rq.keys = &lowkeys;
1059        rq.callback_ctx = NULL;
1060        rq.fetch_callback = lookup_callback;
1061        rq.node_callback = NULL;
1062        rq.fold = 1;
1063
1064        errcode = btree_lookup(&rq, header->view_states[i]->pointer);
1065        if (errcode != COUCHSTORE_SUCCESS) {
1066            return -1;
1067        }
1068        *total = rq.num_keys - 1;
1069    }
1070    return 0;
1071}
1072
1073static void usage(void) {
1074    printf("USAGE: couch_dbdump [options] file.couch [main_xxxx.view.X ...]\n");
1075    printf("\nOptions:\n");
1076    printf("    --vbucket <vb_file> decode vbucket file\n");
1077    printf("    --view <view_file> decode view index file\n");
1078    printf("    --key <key>  dump only the specified document\n");
1079    printf("    --hex-body   convert document body data to hex (for binary data)\n");
1080    printf("    --no-body    don't retrieve document bodies (metadata only, faster)\n");
1081    printf("    --byid       sort output by document ID\n");
1082    printf("    --byseq      sort output by document sequence number (default)\n");
1083    printf("    --json       dump data as JSON objects (one per line)\n");
1084    printf("    --no-namespace  don't decode namespaces\n");
1085    printf("    --iterate-headers  Iterate through all headers\n");
1086    printf("\nAlternate modes:\n");
1087    printf("    --tree       show file b-tree structure instead of data\n");
1088    printf("    --local      dump local documents\n");
1089    printf("    --map        dump block map \n");
1090    exit(EXIT_FAILURE);
1091}
1092
1093int main(int argc, char **argv)
1094{
1095    int error = 0;
1096    int count = 0;
1097    int ii = 1;
1098
1099    if (argc < 2) {
1100        usage();
1101    }
1102
1103    while (ii < argc && strncmp(argv[ii], "-", 1) == 0) {
1104        if (strcmp(argv[ii], "--view") == 0) {
1105            decodeIndex = true;
1106        } else if (strcmp(argv[ii], "--vbucket") == 0) {
1107            decodeVbucket = true;
1108        } else if (strcmp(argv[ii], "--byid") == 0) {
1109            mode = DumpByID;
1110        } else if (strcmp(argv[ii], "--byseq") == 0) {
1111            mode = DumpBySequence;
1112        } else if (strcmp(argv[ii], "--tree") == 0) {
1113            dumpTree = true;
1114        } else if (strcmp(argv[ii], "--json") == 0) {
1115            dumpJson = true;
1116        } else if (strcmp(argv[ii], "--hex-body") == 0) {
1117            dumpHex = true;
1118        } else if (strcmp(argv[ii], "--no-body") == 0) {
1119            dumpBody = false;
1120        } else if (strcmp(argv[ii], "--no-namespace") == 0) {
1121            decodeNamespace = false;
1122        } else if (strcmp(argv[ii], "--key") == 0) {
1123            if (argc < (ii + 1)) {
1124                usage();
1125            }
1126            oneKey = true;
1127            dumpKey.buf = argv[ii+1];
1128            dumpKey.size = strlen(argv[ii+1]);
1129            if (mode == DumpBySequence) {
1130                mode = DumpByID;
1131            }
1132            ii++;
1133        } else if (strcmp(argv[ii], "--local") == 0) {
1134            mode = DumpLocals;
1135        } else if (strcmp(argv[ii], "--map") == 0) {
1136            mode = DumpFileMap;
1137        } else if (strcmp(argv[ii], "--iterate-headers") == 0) {
1138            iterateHeaders = true;
1139        } else {
1140            usage();
1141        }
1142        ++ii;
1143    }
1144
1145    if (ii >= argc || (mode == DumpLocals && dumpTree)) {
1146        usage();
1147    }
1148
1149    for (; ii < argc; ++ii) {
1150        if (decodeIndex) {
1151            error += process_view_file(argv[ii], &count);
1152        } else if (decodeVbucket) {
1153            error += process_vbucket_file(argv[ii], &count);
1154        } else {
1155            usage();
1156        }
1157    }
1158
1159    printf("\nTotal docs: %d\n", count);
1160    if (error) {
1161        exit(EXIT_FAILURE);
1162    } else {
1163        exit(EXIT_SUCCESS);
1164    }
1165}
1166