xref: /5.5.2/couchstore/src/dbck.cc (revision 1f5879fb)
1/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 *     Copyright 2017 Couchbase, Inc
4 *
5 *   Licensed under the Apache License, Version 2.0 (the "License");
6 *   you may not use this file except in compliance with the License.
7 *   You may obtain a copy of the License at
8 *
9 *       http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *   Unless required by applicable law or agreed to in writing, software
12 *   distributed under the License is distributed on an "AS IS" BASIS,
13 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *   See the License for the specific language governing permissions and
15 *   limitations under the License.
16 */
17
18#include "config.h"
19#include "couch_btree.h"
20#include "internal.h"
21#include "util.h"
22
23#include <libcouchstore/couch_db.h>
24#include <platform/cb_malloc.h>
25
26#include <getopt.h>
27#include <inttypes.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <unistd.h>
32
33#include <iomanip>
34#include <string>
35#include <sstream>
36
37static void usage(void) {
38    printf("USAGE: couch_dbck [options] "
39           "source_filename [destination_filename]\n");
40    printf("\nOptions:\n");
41    printf("    -s, --stale       "
42           "Recover from stale commits if corruption detected.\n");
43    printf("    -v, --verbose     "
44           "Display detailed messages.\n");
45    printf("    -j, --json        "
46           "Display corrupt document info as JSON objects "
47           "(one per line).\n");
48    exit(EXIT_FAILURE);
49}
50
51struct recovery_options {
52    // Source file name.
53    std::string src_filename;
54    // Destination (recovered) file name.
55    std::string dst_filename;
56    // If set, check whether or not doc body is corrupted.
57    bool detect_corrupt_docbody = true;
58    // If set, recover using old data from stale commits.
59    bool enable_rewind = false;
60    // If set, print out detailed messages.
61    bool verbose_msg = false;
62    // If set, print verbose messages as JSON objects.
63    bool json = false;
64};
65
66struct recover_file_hook_param {
67    // Number of documents referred to by index.
68    uint64_t num_visited_docs = 0;
69    // Number of corrupted documents.
70    uint64_t num_corrupted_docs = 0;
71    // Recovery options.
72    recovery_options* options = nullptr;
73    // DB handle for source file.
74    Db* db_src = nullptr;
75};
76
77std::string get_printable_string(const sized_buf& buf) {
78    std::string ret;
79    for (size_t i=0; i<buf.size; ++i) {
80        if (0x20 <= buf.buf[i] && buf.buf[i] <= 0x7d) {
81            // Printable character.
82            ret += buf.buf[i];
83        } else {
84            // Otherwise: dump hex.
85            std::stringstream ss;
86            ss << "(0x" << std::setfill('0') << std::setw(2)
87               << std::hex << static_cast<size_t>(buf.buf[i]) << ") ";
88            ret += ss.str();
89        }
90    }
91    return ret;
92}
93
94static int recover_file_hook(Db* target,
95                             DocInfo *docinfo,
96                             sized_buf item,
97                             void *ctx) {
98    (void)item;
99    recover_file_hook_param* param =
100            reinterpret_cast<recover_file_hook_param*>(ctx);
101    if (!docinfo) {
102        // End of compaction.
103        return 0;
104    }
105
106    param->num_visited_docs++;
107
108    couchstore_error_t errcode;
109    Doc* cur_doc;
110
111    if (docinfo->deleted) {
112        // Deleted doc.
113        return 0;
114    }
115
116    // Open doc body.
117    errcode = couchstore_open_doc_with_docinfo(
118            param->db_src, docinfo, &cur_doc, 0x0);
119    if (errcode != COUCHSTORE_SUCCESS) {
120        // Document is corrupted.
121        if (param->options->verbose_msg) {
122            std::string fmt;
123            if (param->options->json) {
124                fmt = "{"
125                      R"("type":"corrupted document",)"
126                      R"("error code":%d,)"
127                      R"("error message":"%s",)"
128                      R"("id":"%s",)"
129                      R"("bp")" ":%" PRIu64 ","
130                      R"("size":%zu,)"
131                      R"("seq")" ":%" PRIu64
132                      "}\n";
133            } else {
134                fmt = "Corrupted document "
135                      "(error code %d, %s): "
136                      "id '%s', "
137                      "bp %" PRIu64 ", "
138                      "size %zu, "
139                      "seq %" PRIu64
140                      "\n";
141            }
142
143            fprintf(stdout, fmt.c_str(),
144                    errcode, couchstore_strerror(errcode),
145                    get_printable_string(docinfo->id).c_str(),
146                    docinfo->bp,
147                    docinfo->size,
148                    docinfo->db_seq);
149        }
150        param->num_corrupted_docs++;
151    } else {
152        couchstore_free_document(cur_doc);
153    }
154
155    return 0;
156}
157
158struct rewind_request {
159    // Recovery options.
160    recovery_options* options = nullptr;
161    // DB handle for source file.
162    Db *db_src = nullptr;
163    // DB handle for recovered file.
164    Db *db_recovered = nullptr;
165    // Total number of old documents recovered from all stale commits.
166    uint64_t total_num_docs_recovered = 0;
167};
168
169struct rewind_hook_param {
170    // Recovery options.
171    recovery_options* options = nullptr;
172    // DB handle for source file.
173    Db *db_src = nullptr;
174    // DB handle for recovered file.
175    Db *db_dst = nullptr;
176    // Number of documents recovered from this specific commit.
177    uint64_t num_docs_recovered = 0;
178};
179
180static int rewind_hook(Db *db,
181                       int depth,
182                       const DocInfo* doc_info,
183                       uint64_t subtree_size,
184                       const sized_buf* reduce_value,
185                       void *ctx) {
186    rewind_hook_param* param =
187            reinterpret_cast<rewind_hook_param*>(ctx);
188    if (!doc_info) {
189        return 0;
190    }
191
192    DocInfo* doc_info_dst;
193    couchstore_error_t errcode;
194    errcode = couchstore_docinfo_by_id(param->db_dst,
195                                       doc_info->id.buf,
196                                       doc_info->id.size,
197                                       &doc_info_dst);
198    if (errcode != COUCHSTORE_SUCCESS) {
199        // The doc exists in stale commit (of corrupted file) only.
200        // Copy it into the destination file.
201        Doc* cur_doc;
202        errcode = couchstore_open_doc_with_docinfo(
203                param->db_src, (DocInfo*)doc_info, &cur_doc, 0x0);
204        if (errcode != COUCHSTORE_SUCCESS) {
205            return 0;
206        }
207
208        if (param->options->verbose_msg) {
209            std::string fmt;
210            if (param->options->json) {
211                fmt = "{"
212                      R"("type":"recovered document",)"
213                      R"("id":"%s",)"
214                      R"("bp")" ":%" PRIu64 ","
215                      R"("size")" ":%zu,"
216                      R"("seq")" ":%" PRIu64
217                      "}\n";
218            } else {
219                fmt = "Recovered document '%s', "
220                      "prev bp %" PRIu64 ", "
221                      "prev size %zu, "
222                      "prev seq num %" PRIu64
223                      "\n";
224            }
225
226            fprintf(stdout, fmt.c_str(),
227                    get_printable_string(doc_info->id).c_str(),
228                    doc_info->bp,
229                    doc_info->size,
230                    doc_info->db_seq);
231        }
232
233        couchstore_save_document(param->db_dst,
234                                 cur_doc,
235                                 (DocInfo*)doc_info,
236                                 COUCHSTORE_SEQUENCE_AS_IS);
237        param->num_docs_recovered++;
238        couchstore_free_document(cur_doc);
239    } else {
240        couchstore_free_docinfo(doc_info_dst);
241    }
242    return 0;
243}
244
245static void rewind_and_get_stale_data(rewind_request& rq) {
246    couchstore_error_t errcode;
247    size_t num_rewind = 0;
248    Db *db = nullptr;
249
250    errcode = couchstore_open_db_ex(rq.options->src_filename.c_str(),
251                                    COUCHSTORE_OPEN_FLAG_RDONLY,
252                                    couchstore_get_default_file_ops(),
253                                    &db);
254
255    while (errcode == COUCHSTORE_SUCCESS) {
256        errcode = couchstore_rewind_db_header(db);
257        if (errcode != COUCHSTORE_SUCCESS) {
258            db = nullptr;
259            break;
260        }
261        num_rewind++;
262
263        rewind_hook_param rewind_param;
264        rewind_param.options = rq.options;
265        rewind_param.db_dst = rq.db_recovered;
266        rewind_param.db_src = rq.db_src;
267
268        // Walk ID tree and find any documents
269        // that exist in stale commit only.
270        couchstore_walk_id_tree(db,
271                                nullptr,
272                                COUCHSTORE_TOLERATE_CORRUPTION,
273                                rewind_hook,
274                                &rewind_param);
275        if (rewind_param.num_docs_recovered) {
276            fprintf(stderr, "%" PRIu64 " documents recovered "
277                    "from stale header #%zu.\n",
278                    rewind_param.num_docs_recovered,
279                    num_rewind);
280            rq.total_num_docs_recovered += rewind_param.num_docs_recovered;
281        }
282    };
283
284    if (!num_rewind) {
285        fprintf(stderr, "No stale header to read.\n");
286    }
287
288    if (db) {
289        couchstore_close_file(db);
290        couchstore_free_db(db);
291    }
292}
293
294static int recover_file(recovery_options& options) {
295    fprintf(stderr, "Recover from file %s to file %s\n",
296            options.src_filename.c_str(),
297            options.dst_filename.c_str());
298
299    if (options.src_filename == options.dst_filename) {
300        // Both filenames shouldn't be the same.
301        usage();
302    }
303
304    // Source (may be corrupted) DB.
305    Db *db_src = nullptr;
306    // DB after recovery.
307    Db *db_recovered = nullptr;
308    // Another handle for source DB.
309    Db *db_src_alt = nullptr;
310
311    couchstore_error_t errcode = COUCHSTORE_SUCCESS;
312    couchstore_error_t errcode_compaction = COUCHSTORE_SUCCESS;
313    bool error_detected = false;
314
315    recover_file_hook_param param;
316    param.options = &options;
317
318    // Open source file.
319    errcode = couchstore_open_db_ex(options.src_filename.c_str(),
320                                    COUCHSTORE_OPEN_FLAG_RDONLY,
321                                    couchstore_get_default_file_ops(),
322                                    &db_src);
323    error_pass(errcode);
324
325    // Open source file for rewind.
326    errcode = couchstore_open_db_ex(options.src_filename.c_str(),
327                                    COUCHSTORE_OPEN_FLAG_RDONLY,
328                                    couchstore_get_default_file_ops(),
329                                    &db_src_alt);
330    error_pass(errcode);
331    param.db_src = db_src_alt;
332
333    // Compact with recovery mode.
334    errcode_compaction = couchstore_compact_db_ex(
335            db_src,
336            options.dst_filename.c_str(),
337            COUCHSTORE_COMPACT_RECOVERY_MODE,
338            recover_file_hook,
339            nullptr,
340            &param,
341            couchstore_get_default_file_ops());
342
343    // Open recovered file.
344    errcode = couchstore_open_db_ex(options.dst_filename.c_str(),
345                                    0x0,
346                                    couchstore_get_default_file_ops(),
347                                    &db_recovered);
348    DbInfo dbinfo;
349    couchstore_db_info(db_recovered, &dbinfo);
350
351    if (errcode_compaction == COUCHSTORE_SUCCESS) {
352        fprintf(stderr, "No corruption detected in index.\n");
353    } else {
354        fprintf(stderr,
355                "Corrupted index node detected "
356                "(error code %d, %s).\n",
357                errcode_compaction,
358                couchstore_strerror(errcode_compaction));
359        error_detected = true;
360    }
361    fprintf(stderr, "Total %" PRIu64 " documents are referred to by index.\n",
362            param.num_visited_docs);
363
364    if (param.num_corrupted_docs) {
365        fprintf(stderr, "Total %" PRIu64 " documents corrupted.\n",
366                param.num_corrupted_docs);
367        error_detected = true;
368    } else {
369        fprintf(stderr, "No corruption detected in documents.\n");
370    }
371
372    fprintf(stderr, "Total %" PRIu64 " documents recovered.\n",
373            dbinfo.doc_count);
374
375    // If error detected, and flag is set, traverse stale commits.
376    if (error_detected && options.enable_rewind) {
377        rewind_request rwrq;
378        rwrq.options = &options;
379        rwrq.db_recovered = db_recovered;
380        rwrq.db_src = db_src_alt;
381
382        fprintf(stderr,
383                "We are going to recover missing documents "
384                "from stale commits..\n");
385        rewind_and_get_stale_data(rwrq);
386
387        if (rwrq.total_num_docs_recovered) {
388            fprintf(stderr,
389                    "Total %" PRIu64 " documents recovered "
390                    "from stale headers.\n",
391                    rwrq.total_num_docs_recovered);
392            error_pass(couchstore_commit(db_recovered));
393        }
394    }
395
396cleanup:
397    if (db_src) {
398        couchstore_close_file(db_src);
399        couchstore_free_db(db_src);
400    }
401    if (db_recovered) {
402        couchstore_close_file(db_recovered);
403        couchstore_free_db(db_recovered);
404    }
405    if (db_src_alt) {
406        couchstore_close_file(db_src_alt);
407        couchstore_free_db(db_src_alt);
408    }
409
410    return errcode;
411}
412
413int main(int argc, char **argv)
414{
415    struct option long_options[] =
416    {
417        {"stale",   no_argument, 0, 's'},
418        {"verbose", no_argument, 0, 'v'},
419        {"json",    no_argument, 0, 'j'},
420        {nullptr,   0,           0, 0}
421    };
422
423    recovery_options options;
424    int opt;
425
426    while ( (opt = getopt_long(argc, argv, "svj",
427                               long_options, nullptr)) != -1 )  {
428        switch (opt) {
429        case 's': // stale
430            options.enable_rewind = true;
431            break;
432        case 'v': // verbose
433            options.verbose_msg = true;
434            break;
435        case 'j': // json
436            options.json = true;
437            break;
438
439        default:
440            usage();
441        }
442    }
443
444    if (argc - optind < 1) {
445        // After option fields, one or two more fields
446        // (for src/dst files) should exist.
447        usage();
448    }
449
450    options.src_filename = argv[optind];
451    if (argc - optind == 1) {
452        // Destination file name is not given, automatically set it.
453        options.dst_filename = options.src_filename + ".recovered";
454    } else {
455        options.dst_filename = argv[optind+1];
456    }
457
458    int errcode = recover_file(options);
459
460    if (errcode == 0) {
461        exit(EXIT_SUCCESS);
462    } else {
463        exit(EXIT_FAILURE);
464    }
465}
466
467