xref: /4.0.0/couchstore/src/dbdump.c (revision dfab1c7a)
1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2#include "config.h"
3#include <string.h>
4#include <stdbool.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <inttypes.h>
8#include <libcouchstore/couch_db.h>
9#include <snappy-c.h>
10#include "couch_btree.h"
11#include "util.h"
12#include "bitfield.h"
13#include "internal.h"
14
15typedef enum {
16    DumpBySequence,
17    DumpByID,
18    DumpLocals
19} DumpMode;
20
21static DumpMode mode = DumpBySequence;
22static bool dumpTree = false;
23static bool dumpJson = false;
24static bool dumpHex = false;
25static bool oneKey = false;
26static bool noBody = false;
27static sized_buf dumpKey;
28
29typedef struct {
30    raw_64 cas;
31    raw_32 expiry;
32    raw_32 flags;
33} CouchbaseRevMeta;
34
35static void printsb(const sized_buf *sb)
36{
37    if (sb->buf == NULL) {
38        printf("null\n");
39        return;
40    }
41    printf("%.*s\n", (int) sb->size, sb->buf);
42}
43
44static void printsbhexraw(const sized_buf* sb) {
45    size_t ii;
46    for (ii = 0; ii < sb->size; ++ii) {
47        printf("%.02x", (uint8_t)sb->buf[ii]);
48    }
49}
50
51static void printsbhex(const sized_buf *sb, int with_ascii)
52{
53    size_t i;
54
55    if (sb->buf == NULL) {
56        printf("null\n");
57        return;
58    }
59    printf("{");
60    for (i = 0; i < sb->size; ++i) {
61        printf("%.02x", (uint8_t)sb->buf[i]);
62        if (i % 4 == 3) {
63            printf(" ");
64        }
65    }
66    printf("}");
67    if (with_ascii) {
68        printf("  (\"");
69        for (i = 0; i < sb->size; ++i) {
70            uint8_t ch = sb->buf[i];
71            if (ch < 32 || ch >= 127) {
72                ch = '?';
73            }
74            printf("%c", ch);
75        }
76        printf("\")");
77    }
78    printf("\n");
79}
80
81static void printjquote(const sized_buf *sb)
82{
83    const char* i = sb->buf;
84    const char* end = sb->buf + sb->size;
85    if (sb->buf == NULL) {
86        return;
87    }
88    for (; i < end; i++) {
89        if (*i > 31 && *i != '\"' && *i != '\\') {
90            fputc(*i, stdout);
91        } else {
92            fputc('\\', stdout);
93            switch(*i)
94            {
95                case '\\': fputc('\\', stdout);break;
96                case '\"': fputc('\"', stdout);break;
97                case '\b': fputc('b', stdout);break;
98                case '\f': fputc('f', stdout);break;
99                case '\n': fputc('n', stdout);break;
100                case '\r': fputc('r', stdout);break;
101                case '\t': fputc('t', stdout);break;
102                default:
103                           printf("u00%.02x", *i);
104            }
105        }
106    }
107}
108
109static int foldprint(Db *db, DocInfo *docinfo, void *ctx)
110{
111    int *count = (int *) ctx;
112    Doc *doc = NULL;
113    uint64_t cas;
114    uint32_t expiry, flags;
115    uint8_t datatype = 0x00, flex_code = 0x01, conf_res_mode = 0x00;
116    couchstore_error_t docerr;
117    (*count)++;
118
119    if (dumpJson) {
120        printf("{\"seq\":%"PRIu64",\"id\":\"", docinfo->db_seq);
121        printjquote(&docinfo->id);
122        printf("\",");
123    } else {
124        if (mode == DumpBySequence) {
125            printf("Doc seq: %"PRIu64"\n", docinfo->db_seq);
126            printf("     id: ");
127            printsb(&docinfo->id);
128        } else {
129            printf("  Doc ID: ");
130            printsb(&docinfo->id);
131            if (docinfo->db_seq > 0) {
132                printf("     seq: %"PRIu64"\n", docinfo->db_seq);
133            }
134        }
135    }
136    if (docinfo->bp == 0 && docinfo->deleted == 0 && !dumpJson) {
137        printf("         ** This b-tree node is corrupt; raw node value follows:*\n");
138        printf("    raw: ");
139        printsbhex(&docinfo->rev_meta, 1);
140        return 0;
141    }
142    if (dumpJson) {
143        printf("\"rev\":%"PRIu64",\"content_meta\":%d,", docinfo->rev_seq,
144                                                         docinfo->content_meta);
145        printf("\"physical_size\":%"PRIu64",", (uint64_t)docinfo->size);
146    } else {
147        printf("     rev: %"PRIu64"\n", docinfo->rev_seq);
148        printf("     content_meta: %d\n", docinfo->content_meta);
149        printf("     size (on disk): %"PRIu64"\n", (uint64_t)docinfo->size);
150    }
151    if (docinfo->rev_meta.size >= sizeof(CouchbaseRevMeta)) {
152        const CouchbaseRevMeta* meta = (const CouchbaseRevMeta*)docinfo->rev_meta.buf;
153        cas = decode_raw64(meta->cas);
154        expiry = decode_raw32(meta->expiry);
155        flags = decode_raw32(meta->flags);
156        if (docinfo->rev_meta.size > sizeof(CouchbaseRevMeta)) {
157            // 18 bytes of rev_meta indicates CouchbaseRevMeta along with
158            // flex_meta_code (1B) and datatype (1B)
159            if (docinfo->rev_meta.size < sizeof(CouchbaseRevMeta) + 2) {
160                printf("     Error parsing the document: Possible corruption\n");
161                return 1;
162            }
163            flex_code = *((uint8_t *)(docinfo->rev_meta.buf + sizeof(CouchbaseRevMeta)));
164            if (flex_code < 0x01) {
165                printf("     Error: Flex code mismatch (bad code: %d)\n",
166                       flex_code);
167                return 1;
168            }
169            datatype = *((uint8_t *)(docinfo->rev_meta.buf + sizeof(CouchbaseRevMeta) +
170                        sizeof(uint8_t)));
171            if (docinfo->rev_meta.size > sizeof(CouchbaseRevMeta) + 2) {
172                // 19 bytes of rev_meta indicates CouchbaseRevMeta along with
173                // flex_meta_code (1B) and datatype (1B), along with the conflict
174                // resolution flag (1B).
175                conf_res_mode = *((uint8_t *)(docinfo->rev_meta.buf +
176                                  sizeof(CouchbaseRevMeta) + sizeof(uint8_t) +
177                                  sizeof(uint8_t)));
178
179                if (dumpJson) {
180                    printf("\"cas\":\"%"PRIu64"\",\"expiry\":%"PRIu32",\"flags\":%"PRIu32","
181                           "\"datatype\":%d,\"conflict_resolution_mode\":%d,",
182                            cas, expiry, flags, datatype, conf_res_mode);
183                } else {
184                    printf("     cas: %"PRIu64", expiry: %"PRIu32", flags: %"PRIu32", "
185                           "datatype: %d, conflict_resolution_mode: %d\n",
186                           cas, expiry, flags, datatype, conf_res_mode);
187                }
188            } else {
189                if (dumpJson) {
190                    printf("\"cas\":\"%"PRIu64"\",\"expiry\":%"PRIu32",\"flags\":%"PRIu32","
191                           "\"datatype\":%d,",
192                            cas, expiry, flags, datatype);
193                } else {
194                    printf("     cas: %"PRIu64", expiry: %"PRIu32", flags: %"PRIu32", "
195                           "datatype: %d\n",
196                           cas, expiry, flags, datatype);
197                }
198            }
199        } else {
200            if (dumpJson) {
201                printf("\"cas\":\"%"PRIu64"\",\"expiry\":%"PRIu32",\"flags\":%"PRIu32",",
202                        cas, expiry, flags);
203            } else {
204                printf("     cas: %"PRIu64", expiry: %"PRIu32", flags: %"PRIu32"\n",
205                        cas, expiry, flags);
206            }
207        }
208    }
209    if (docinfo->deleted) {
210        if (dumpJson) {
211            printf("\"deleted\":true,");
212        } else {
213            printf("     doc deleted\n");
214        }
215    }
216
217    if (!noBody) {
218        docerr = couchstore_open_doc_with_docinfo(db, docinfo, &doc, DECOMPRESS_DOC_BODIES);
219        if (docerr != COUCHSTORE_SUCCESS) {
220            if (dumpJson) {
221                printf("\"body\":null}\n");
222            } else {
223                printf("     could not read document body: %s\n", couchstore_strerror(docerr));
224            }
225        } else if (doc) {
226            sized_buf new_body;
227            if (datatype >= 0x02) {
228                size_t rlen;
229                snappy_uncompressed_length(doc->data.buf, doc->data.size, &rlen);
230                char *decbuf = (char *) malloc(rlen);
231                size_t new_len;
232                snappy_uncompress(doc->data.buf, doc->data.size, decbuf, &new_len);
233                new_body.size = new_len;
234                new_body.buf = decbuf;
235            } else {
236                new_body = doc->data;
237            }
238            if (dumpJson) {
239                printf("\"size\":%"PRIu64",", (uint64_t)new_body.size);
240                if (docinfo->content_meta & COUCH_DOC_IS_COMPRESSED) {
241                    printf("\"snappy\":true,\"body\":\"");
242                } else {
243                    printf("\"body\":\"");
244                }
245                printjquote(&new_body);
246                printf("\"}\n");
247            } else {
248                printf("     size: %"PRIu64"\n", (uint64_t)new_body.size);
249                printf("     data:%s",
250                       docinfo->content_meta & COUCH_DOC_IS_COMPRESSED ?
251                       " (snappy) " : " ");
252                if (dumpHex) {
253                    printsbhexraw(&new_body);
254                    printf("\n");
255                } else {
256                    printsb(&new_body);
257                }
258            }
259            if (datatype >= 0x02) {
260                free (new_body.buf);
261            }
262        }
263    } else {
264        if (dumpJson) {
265            printf("\"body\":null}\n");
266        } else {
267            printf("\n");
268        }
269    }
270
271    couchstore_free_document(doc);
272    return 0;
273}
274
275
276static int visit_node(Db *db,
277                      int depth,
278                      const DocInfo* docinfo,
279                      uint64_t subtreeSize,
280                      const sized_buf* reduceValue,
281                      void *ctx)
282{
283    int i;
284    (void) db;
285
286    for (i = 0; i < depth; ++i)
287        printf("  ");
288    if (reduceValue) {
289        /* This is a tree node: */
290        printf("+ (%"PRIu64") ", subtreeSize);
291        printsbhex(reduceValue, 0);
292    } else if (docinfo->bp > 0) {
293        int *count;
294        /* This is a document: */
295        printf("%c (%"PRIu64") ", (docinfo->deleted ? 'x' : '*'),
296               (uint64_t)docinfo->size);
297        if (mode == DumpBySequence) {
298            printf("#%"PRIu64" ", docinfo->db_seq);
299        }
300        printsb(&docinfo->id);
301
302        count = (int *) ctx;
303        (*count)++;
304    } else {
305        /* Document, but not in a known format: */
306        printf("**corrupt?** ");
307        printsbhex(&docinfo->rev_meta, 1);
308    }
309    return 0;
310}
311
312static couchstore_error_t local_doc_print(couchfile_lookup_request *rq,
313                                          const sized_buf *k,
314                                          const sized_buf *v)
315{
316    int* count = (int*) rq->callback_ctx;
317    if (!v) {
318        return COUCHSTORE_ERROR_DOC_NOT_FOUND;
319    }
320    (*count)++;
321    sized_buf *id = (sized_buf *) k;
322    if (dumpJson) {
323        printf("{\"id\":\"");
324        printjquote(id);
325        printf("\",");
326    } else {
327        printf("Key: ");
328        printsb(id);
329    }
330
331    if (dumpJson) {
332        printf("\"value\":\"");
333        printjquote(v);
334        printf("\"}\n");
335    } else {
336        printf("Value: ");
337        printsb(v);
338        printf("\n");
339    }
340
341    return COUCHSTORE_SUCCESS;
342}
343
344static couchstore_error_t couchstore_print_local_docs(Db *db, int *count)
345{
346    sized_buf key;
347    sized_buf *keylist = &key;
348    couchfile_lookup_request rq;
349    couchstore_error_t errcode;
350
351    if (db->header.local_docs_root == NULL) {
352        if (oneKey) {
353            return COUCHSTORE_ERROR_DOC_NOT_FOUND;
354        } else {
355            return COUCHSTORE_SUCCESS;
356        }
357    }
358
359    key.buf = (char *)"\0";
360    key.size = 0;
361
362    rq.cmp.compare = ebin_cmp;
363    rq.file = &db->file;
364    rq.num_keys = 1;
365    rq.keys = &keylist;
366    rq.callback_ctx = count;
367    rq.fetch_callback = local_doc_print;
368    rq.node_callback = NULL;
369    rq.fold = 1;
370
371    if (oneKey) {
372        rq.fold = 0;
373        key = dumpKey;
374    }
375
376    errcode = btree_lookup(&rq, db->header.local_docs_root->pointer);
377    return errcode;
378}
379
380static int process_file(const char *file, int *total)
381{
382    Db *db;
383    couchstore_error_t errcode;
384    int count = 0;
385
386    errcode = couchstore_open_db(file, COUCHSTORE_OPEN_FLAG_RDONLY, &db);
387    if (errcode != COUCHSTORE_SUCCESS) {
388        fprintf(stderr, "Failed to open \"%s\": %s\n",
389                file, couchstore_strerror(errcode));
390        return -1;
391    } else {
392        printf("Dumping \"%s\":\n", file);
393    }
394
395    switch (mode) {
396        case DumpBySequence:
397            if (dumpTree) {
398                errcode = couchstore_walk_seq_tree(db, 0, COUCHSTORE_INCLUDE_CORRUPT_DOCS,
399                                                   visit_node, &count);
400            } else {
401                errcode = couchstore_changes_since(db, 0, COUCHSTORE_INCLUDE_CORRUPT_DOCS,
402                                                   foldprint, &count);
403            }
404            break;
405        case DumpByID:
406            if (dumpTree) {
407                errcode = couchstore_walk_id_tree(db, NULL, COUCHSTORE_INCLUDE_CORRUPT_DOCS,
408                                                  visit_node, &count);
409            } else if (oneKey) {
410                DocInfo* info;
411                errcode = couchstore_docinfo_by_id(db, dumpKey.buf, dumpKey.size, &info);
412                if (errcode == COUCHSTORE_SUCCESS) {
413                    foldprint(db, info, &count);
414                    couchstore_free_docinfo(info);
415                }
416            } else {
417                errcode = couchstore_all_docs(db, NULL, COUCHSTORE_INCLUDE_CORRUPT_DOCS,
418                                              foldprint, &count);
419            }
420            break;
421        case DumpLocals:
422            errcode = couchstore_print_local_docs(db, &count);
423            break;
424    }
425    (void)couchstore_close_db(db);
426
427    if (errcode < 0) {
428        fprintf(stderr, "Failed to dump database \"%s\": %s\n",
429                file, couchstore_strerror(errcode));
430        return -1;
431    }
432
433    *total += count;
434    return 0;
435}
436
437static void usage(void) {
438    printf("USAGE: couch_dbdump [options] file.couch [file2.couch ...]\n");
439    printf("\nOptions:\n");
440    printf("       --key <key>  dump only the specified document\n");
441    printf("       --hex-body   convert document body data to hex (for binary data)\n");
442    printf("       --no-body    don't retrieve document bodies (metadata only, faster)\n");
443    printf("       --byid       sort output by document ID\n");
444    printf("       --byseq      sort output by document sequence number (default)\n");
445    printf("       --json       dump data as JSON objects (one per line)\n");
446    printf("\nAlternate modes:\n");
447    printf("       --tree       show file b-tree structure instead of data\n");
448    printf("       --local      dump local documents\n");
449    exit(EXIT_FAILURE);
450}
451
452int main(int argc, char **argv)
453{
454    int error = 0;
455    int count = 0;
456    int ii = 1;
457
458    if (argc < 2) {
459        usage();
460    }
461
462    while (ii < argc && strncmp(argv[ii], "-", 1) == 0) {
463        if (strcmp(argv[ii], "--byid") == 0) {
464            mode = DumpByID;
465        } else if (strcmp(argv[ii], "--byseq") == 0) {
466            mode = DumpBySequence;
467        } else if (strcmp(argv[ii], "--tree") == 0) {
468            dumpTree = true;
469        } else if (strcmp(argv[ii], "--json") == 0) {
470            dumpJson = true;
471        } else if (strcmp(argv[ii], "--hex-body") == 0) {
472            dumpHex = true;
473        } else if (strcmp(argv[ii], "--no-body") == 0) {
474            noBody = true;
475        } else if (strcmp(argv[ii], "--key") == 0) {
476            if (argc < (ii + 1)) {
477                usage();
478            }
479            oneKey = true;
480            dumpKey.buf = argv[ii+1];
481            dumpKey.size = strlen(argv[ii+1]);
482            if (mode == DumpBySequence) {
483                mode = DumpByID;
484            }
485            ii++;
486        } else if (strcmp(argv[ii], "--local") == 0) {
487            mode = DumpLocals;
488        } else {
489            usage();
490        }
491        ++ii;
492    }
493
494    if (ii >= argc || (mode == DumpLocals && dumpTree)) {
495        usage();
496    }
497
498    for (; ii < argc; ++ii) {
499        error += process_file(argv[ii], &count);
500    }
501
502    printf("\nTotal docs: %d\n", count);
503    if (error) {
504        exit(EXIT_FAILURE);
505    } else {
506        exit(EXIT_SUCCESS);
507    }
508}
509