xref: /5.5.2/couchstore/programs/dbdiff/dbdiff.cc (revision 16455239)
1/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2#include "config.h"
3
4#include "bitfield.h"
5
6#include <libcouchstore/couch_db.h>
7#include <platform/compress.h>
8
9#include <getopt.h>
10#include <cctype>
11#include <cstdlib>
12#include <cstring>
13#include <string>
14
15static int quiet = 0;
16struct compare_context {
17    Db* self;
18    DbInfo self_info;
19    Db* other;
20    DbInfo other_info;
21    int diff;
22};
23
24static void usage() {
25    printf("USAGE: dbdiff [-q] file1 file2\n");
26    printf("   -q\tquiet\n");
27    exit(EXIT_FAILURE);
28}
29
30static int is_printable_key(sized_buf key) {
31    size_t ii;
32    for (ii = 0; ii < key.size; ++ii) {
33        if (!isprint(key.buf[ii])) {
34            return 0;
35        }
36    }
37
38    return 1;
39}
40
41static void print_key(sized_buf key, FILE* out = stdout) {
42    if (is_printable_key(key)) {
43        fwrite(key.buf, 1, key.size, out);
44    } else {
45        size_t ii;
46        for (ii = 0; ii < key.size; ++ii) {
47            fprintf(out, "0x%02x", int(key.buf[ii]));
48        }
49    }
50}
51
52static void print_missing(sized_buf key, const char* fname) {
53    if (!quiet) {
54        printf("Document \"");
55        print_key(key);
56        printf("\" is missing from \"%s\"\n", fname);
57    }
58}
59
60// Encoding of Couchbase per-revision metadata
61struct CouchbaseRevMeta {
62    raw_64 cas;
63    raw_32 expiry;
64    raw_32 flags;
65};
66
67static void compare_docinfo(compare_context* ctx,
68                            const DocInfo* a,
69                            const DocInfo* b,
70                            bool& compressed) {
71    if (a->db_seq != b->db_seq) {
72        ctx->diff = 1;
73        if (!quiet) {
74            printf("Document db_seq differs for \"");
75            print_key(a->id);
76            printf("\": %" PRIu64 " - %" PRIu64 "\n", a->db_seq, b->db_seq);
77        }
78    }
79
80    if (a->rev_seq != b->rev_seq) {
81        ctx->diff = 1;
82        if (!quiet) {
83            printf("Document rev_seq differs for \"");
84            print_key(a->id);
85            printf("\": %" PRIu64 " - %" PRIu64 "\n", a->rev_seq, b->rev_seq);
86        }
87    }
88
89    // Assume datatype is raw; unless we decode otherwise later.
90    uint8_t a_datatype = 0;
91    uint8_t b_datatype = 0;
92
93    if (a->rev_meta.size != b->rev_meta.size) {
94        ctx->diff = 1;
95        if (!quiet) {
96            printf("Document rev_meta size differs for \"");
97            print_key(a->id);
98            printf("\": %" PRIu64 " - %" PRIu64 "\n",
99                   (uint64_t)a->rev_meta.size,
100                   (uint64_t)b->rev_meta.size);
101            printf("\"\n");
102        }
103    } else if (a->rev_meta.size >= sizeof(CouchbaseRevMeta)) {
104        // Decode as CouchbaseRevMeta, compare each field.
105        const auto* a_meta = reinterpret_cast<const CouchbaseRevMeta *>(a->rev_meta.buf);
106        const uint64_t a_cas = decode_raw64(a_meta->cas);
107        const uint32_t a_expiry = decode_raw32(a_meta->expiry);
108        const uint32_t a_flags = decode_raw32(a_meta->flags);
109
110        const auto* b_meta = reinterpret_cast<const CouchbaseRevMeta *>(b->rev_meta.buf);
111        const uint64_t b_cas = decode_raw64(b_meta->cas);
112        const uint32_t b_expiry = decode_raw32(b_meta->expiry);
113        const uint32_t b_flags = decode_raw32(b_meta->flags);
114
115        if (a_cas != b_cas) {
116            ctx->diff = 1;
117            if (!quiet) {
118                printf("Document CAS differs for \"");
119                print_key(a->id);
120                printf("\": %" PRIu64 " - %" PRIu64 "\n", a_cas, b_cas);
121            }
122        }
123
124        if (a_expiry != b_expiry) {
125            ctx->diff = 1;
126            if (!quiet) {
127                printf("Document expiry differs for \"");
128                print_key(a->id);
129                printf("\": %" PRIu32 " - %" PRIu32 "\n", a_expiry, b_expiry);
130            }
131        }
132
133        // Flags are not replicated for deleted documements; so ignore any
134        // differences if deleted.
135        if (a_flags != b_flags && !a->deleted) {
136            ctx->diff = 1;
137            if (!quiet) {
138                printf("Document flags differ for \"");
139                print_key(a->id);
140                printf("\": 0x%" PRIx32 " - 0x%" PRIx32 "\n", a_flags, b_flags);
141            }
142        }
143
144        if ((a->rev_meta.size > sizeof(CouchbaseRevMeta)) &&
145            (a->rev_meta.size <= sizeof(CouchbaseRevMeta) + 2)) {
146            // 18 bytes of rev_meta indicates CouchbaseRevMeta along with
147            // flex_meta_code (1B) and datatype (1B)
148            const uint8_t a_flex_code = *((uint8_t *) (a->rev_meta.buf +
149                                                       sizeof(CouchbaseRevMeta)));
150            a_datatype = *((uint8_t *) (a->rev_meta.buf +
151                                        sizeof(CouchbaseRevMeta) +
152                                        sizeof(uint8_t)));
153
154            const uint8_t b_flex_code = *((uint8_t *) (b->rev_meta.buf +
155                                                       sizeof(CouchbaseRevMeta)));
156            b_datatype = *((uint8_t *) (b->rev_meta.buf +
157                                        sizeof(CouchbaseRevMeta) +
158                                        sizeof(uint8_t)));
159
160            if (a_flex_code != b_flex_code) {
161                ctx->diff = 1;
162                if (!quiet) {
163                    printf("Document flex_code differ for \"");
164                    print_key(a->id);
165                    printf("\": %" PRIx8 " - %" PRIx8 "\n", a_flex_code, b_flex_code);
166                }
167            }
168
169            if (a_datatype != b_datatype) {
170                ctx->diff = 1;
171                if (!quiet) {
172                    printf("Document datatype differ for \"");
173                    print_key(a->id);
174                    printf("\": %" PRIx8 " - %" PRIx8 "\n", a_datatype, b_datatype);
175                }
176            }
177        }
178
179    } else if (memcmp(a->rev_meta.buf, b->rev_meta.buf, a->rev_meta.size) != 0) {
180        ctx->diff = 1;
181        if (!quiet) {
182            printf("Document rev_meta differs for \"");
183            print_key(a->id);
184            printf("\"\n");
185        }
186    }
187
188    if (a->deleted != b->deleted) {
189        ctx->diff = 1;
190        if (!quiet) {
191            printf("Document deleted status differs for \"");
192            print_key(a->id);
193            printf("\": %u - %u\n", a->deleted, b->deleted);
194        }
195    }
196
197    if (a->content_meta != b->content_meta) {
198        ctx->diff = 1;
199        if (!quiet) {
200            printf("Document content_meta differs for \"");
201            print_key(a->id);
202            printf("\": %02x - %02x\n", a->content_meta, b->content_meta);
203        }
204    }
205
206    // If the documents are compressed; then comparing the raw size is
207    // misleading as any difference could be due to how it was compressed.
208    // Instead compare uncompressed length later, in compare_documents (when we
209    // have the document value to decompress).
210    compressed = (a_datatype & 0x2) == 0;
211    if (a->size != b->size && !compressed) {
212        ctx->diff = 1;
213        if (!quiet) {
214            printf("Document size differs for \"");
215            print_key(a->id);
216            printf("\": %" PRIu64 " - %" PRIu64 "\n",
217                   (uint64_t)a->size,
218                   (uint64_t)b->size);
219        }
220    }
221}
222
223static void compare_documents(compare_context* ctx,
224                              DocInfo* this_doc_info,
225                              DocInfo* other_doc_info,
226                              bool compressed) {
227    couchstore_error_t e1, e2;
228    Doc *d1, *d2;
229
230    if (this_doc_info->deleted) {
231        return;
232    }
233
234    e1 = couchstore_open_document(
235            ctx->self, this_doc_info->id.buf, this_doc_info->id.size, &d1, 0);
236    e2 = couchstore_open_document(ctx->other,
237                                  other_doc_info->id.buf,
238                                  other_doc_info->id.size,
239                                  &d2,
240                                  0);
241
242    if (e1 == COUCHSTORE_SUCCESS && e2 == COUCHSTORE_SUCCESS) {
243        cb::compression::Buffer d1_uncompressed;
244        cb::compression::Buffer d2_uncompressed;
245        sized_buf d1_val = d1->data;
246        sized_buf d2_val = d2->data;
247
248        // If the documents are compressed; compare uncompressed data / size.
249        if (compressed) {
250            if (!cb::compression::inflate(cb::compression::Algorithm::Snappy,
251                                          {d1->data.buf, d1->data.size},
252                                          d1_uncompressed)) {
253                fprintf(stderr,
254                        "Failed to uncompress Snappy-compressed document \"");
255                print_key(d1->id, stderr);
256                fprintf(stderr, "\"\n");
257                exit(EXIT_FAILURE);
258            }
259            d1_val = {d1_uncompressed.data(), d1_uncompressed.size()};
260
261            if (!cb::compression::inflate(cb::compression::Algorithm::Snappy,
262                                          {d2->data.buf, d2->data.size},
263                                          d2_uncompressed)) {
264                fprintf(stderr,
265                        "Failed to uncompress Snappy-compressed document \"");
266                print_key(d1->id, stderr);
267                fprintf(stderr, "\"\n");
268                exit(EXIT_FAILURE);
269            }
270            d2_val = {d2_uncompressed.data(), d2_uncompressed.size()};
271        }
272
273        if (d1_val.size != d2_val.size) {
274            ctx->diff = 1;
275            if (!quiet) {
276                printf("Document \"");
277                print_key(this_doc_info->id);
278                printf("\" differs in size!\n");
279            }
280        } else if (memcmp(d1_val.buf, d2_val.buf, d1_val.size) != 0) {
281            ctx->diff = 1;
282            if (!quiet) {
283                printf("Document \"");
284                print_key(this_doc_info->id);
285                printf("\" content differs!\n");
286            }
287        }
288
289        couchstore_free_document(d1);
290        couchstore_free_document(d2);
291    } else {
292        fprintf(stderr,
293                "Failed to open document from this\n this: %s\n other: %s\n",
294                couchstore_strerror(e1),
295                couchstore_strerror(e2));
296        exit(EXIT_FAILURE);
297    }
298}
299
300static int deep_compare(Db* db, DocInfo* docinfo, void* c) {
301    auto* ctx = reinterpret_cast<compare_context*>(c);
302    DocInfo* other_doc_info;
303    couchstore_error_t err;
304
305    err = couchstore_docinfo_by_id(
306            ctx->other, docinfo->id.buf, docinfo->id.size, &other_doc_info);
307
308    if (err == COUCHSTORE_SUCCESS) {
309        /* verify that the docinfos are the same.. */
310        bool compressed;
311        compare_docinfo(ctx, docinfo, other_doc_info, compressed);
312        compare_documents(ctx, docinfo, other_doc_info, compressed);
313        couchstore_free_docinfo(other_doc_info);
314    } else {
315        ctx->diff = 1;
316        print_missing(docinfo->id, ctx->other_info.filename);
317    }
318
319    return 0;
320}
321
322static int check_existing(Db* db, DocInfo* docinfo, void* c) {
323    auto* ctx = reinterpret_cast<compare_context*>(c);
324    couchstore_error_t err;
325    DocInfo* other_info;
326
327    // This function will be called for all docs, including those which are
328    // deleted (tombstones). As such, we need to first lookup the docinfo in
329    // the 'other' file, only reporting as missing if their delete flags differ.
330    err = couchstore_docinfo_by_id(
331            ctx->other, docinfo->id.buf, docinfo->id.size, &other_info);
332
333    if (err == COUCHSTORE_SUCCESS) {
334        if (other_info->deleted != docinfo->deleted) {
335            ctx->diff = 1;
336            print_missing(docinfo->id, ctx->other_info.filename);
337        }
338        couchstore_free_docinfo(other_info);
339    } else if (err == COUCHSTORE_ERROR_DOC_NOT_FOUND) {
340        ctx->diff = 1;
341        print_missing(docinfo->id, ctx->other_info.filename);
342    } else {
343        fprintf(stderr, "Error trying to read \"");
344        print_key(docinfo->id, stderr);
345        fprintf(stderr,
346                "\" from \"%s\": %s\n",
347                ctx->other_info.filename,
348                couchstore_strerror(err));
349        exit(EXIT_FAILURE);
350    }
351
352    return 0;
353}
354
355static int diff(Db** dbs) {
356    couchstore_error_t err;
357    compare_context ctx;
358    DbInfo info;
359
360    ctx.diff = 0;
361    ctx.self = dbs[0];
362    ctx.other = dbs[1];
363
364    if (couchstore_db_info(ctx.self, &ctx.self_info) != COUCHSTORE_SUCCESS ||
365        couchstore_db_info(ctx.other, &ctx.other_info) != COUCHSTORE_SUCCESS) {
366        fprintf(stderr, "Failed to get database info..\n");
367        exit(EXIT_FAILURE);
368    }
369
370    err = couchstore_all_docs(ctx.self, nullptr, 0, deep_compare, &ctx);
371    if (err != COUCHSTORE_SUCCESS) {
372        fprintf(stderr, "An error occured: %s\n", couchstore_strerror(err));
373        return -1;
374    }
375
376    ctx.self = dbs[1];
377    ctx.other = dbs[0];
378    info = ctx.self_info;
379    ctx.self_info = ctx.other_info;
380    ctx.other_info = info;
381
382    err = couchstore_all_docs(ctx.self, nullptr, 0, check_existing, &ctx);
383    if (err != COUCHSTORE_SUCCESS) {
384        fprintf(stderr, "An error occured: %s\n", couchstore_strerror(err));
385        return -1;
386    }
387
388    return ctx.diff;
389}
390
391int main(int argc, char** argv) {
392    int cmd;
393    int ii;
394    Db* dbs[2];
395    int difference;
396
397    while ((cmd = getopt(argc, argv, "q")) != -1) {
398        switch (cmd) {
399        case 'q':
400            quiet = 1;
401            break;
402
403        default:
404            usage();
405            /* NOT REACHED */
406        }
407    }
408
409    if ((optind + 2) != argc) {
410        fprintf(stderr, "Exactly two filenames should be specified\n");
411        usage();
412        /* NOT REACHED */
413    }
414
415    for (ii = 0; ii < 2; ++ii) {
416        couchstore_error_t err;
417        err = couchstore_open_db(
418                argv[optind + ii], COUCHSTORE_OPEN_FLAG_RDONLY, &dbs[ii]);
419        if (err != COUCHSTORE_SUCCESS) {
420            fprintf(stderr,
421                    "Failed to open \"%s\": %s\n",
422                    argv[optind + ii],
423                    couchstore_strerror(err));
424            if (ii == 1) {
425                couchstore_close_file(dbs[0]);
426                couchstore_free_db(dbs[0]);
427            }
428            exit(EXIT_FAILURE);
429        }
430    }
431
432    difference = diff(dbs);
433    for (ii = 0; ii < 2; ++ii) {
434        couchstore_close_file(dbs[ii]);
435        couchstore_free_db(dbs[ii]);
436    }
437
438    if (difference == 0) {
439        if (!quiet) {
440            fprintf(stdout, "The content of the databases is the same\n");
441        }
442        return EXIT_SUCCESS;
443    }
444
445    return EXIT_FAILURE;
446}
447