1#!/usr/bin/env python3
2# -*- python -*-
3#
4# @author Couchbase <info@couchbase.com>
5# @copyright 2011-2019 Couchbase, Inc.
6#
7# Licensed under the Apache License, Version 2.0 (the "License");
8# you may not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11#      http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18import os
19import sys
20import tempfile
21import time
22import subprocess
23import re
24import platform
25import glob
26import socket
27import threading
28import optparse
29import atexit
30import signal
31import urllib.parse
32import shutil
33import errno
34import hashlib
35import uuid
36import codecs
37from datetime import datetime, timedelta, tzinfo
38from io import BytesIO, StringIO
39
40
41# This is a facility that provides a functionality similar to atexit from the
42# standard library. We don't use the latter for the following reasons.
43#
44# When cbcollect_info is started with --watch-stdin flag, we start a thread
45# monitoring stdin that terminates the process when stdin gets closed. The
46# issue is many-fold:
47#
48#  - sys.exit() can only be called from the main thread.
49#
50#  - os._exit() doesn't invoke any of the cleanup functions registered by
51#    atexit.
52#
53#  - It's possible for the stdin watcher thread to interrupt the main thread
54#    by calling _thread.interrupt_main(). This plays nicely with atexit. But
55#    the issue is that the thread can't always be interrupted. So it can take
56#    a noticeable amount of time for the main thread to terminate.
57#
58# So AltExitC is a solution to these issues. It terminates the process as soon
59# as possible by calling os._exit(). The price is that the cleanup actions
60# need to be registered with AltExitC and synchronization is a concern.
61class AltExitC(object):
62    def __init__(self):
63        self.list = []
64        self.lock = threading.Lock()
65        atexit.register(self.at_exit_handler)
66
67    def register(self, f):
68        self.lock.acquire()
69        self.register_and_unlock(f)
70
71    def register_and_unlock(self, f):
72        try:
73            self.list.append(f)
74        finally:
75            self.lock.release()
76
77    def at_exit_handler(self):
78        self.lock.acquire()
79        self.list.reverse()
80        for f in self.list:
81            try:
82                f()
83            except BaseException:
84                # Continue exit handling in spite of any exceptions
85                pass
86
87    def exit(self, status):
88        self.at_exit_handler()
89        os._exit(status)
90
91
92AltExit = AltExitC()
93
94# Currently we decode bytes in this file via LATIN1. The reason for this is that
95# UTF8 (which is the default in python) is a decoding which can fail - i.e. not
96# all sequences of bytes are valid UTF8 and we cannot currenlty guarantee that
97# all bytes that will be run through cbcollect will be valid UTF8. (We need
98# protections elsewhere to make this guarantee that currently don't exist.) By
99# contrast, all byte sequences are valid LATIN1, almost all our content is ASCII
100# and thus LATIN1, and python2 essentially decoded strings as LATIN1, thus we
101# are backwards compatible with pre-6.5 behavior. See MB-33809.
102# For cases in which one knows for certain UTF8 is being used, feel free
103# to use it.
104LATIN1 = 'latin1'
105
106USAGE = """usage: %prog [options] output_file.zip
107
108- Linux/Windows/OSX:
109    %prog output_file.zip
110    %prog -v output_file.zip"""
111
112# adapted from pytz
113
114
115class FixedOffsetTZ(tzinfo):
116    def __init__(self, minutes):
117        if abs(minutes) >= 1440:
118            raise ValueError("absolute offset is too large", minutes)
119        self._minutes = minutes
120        self._offset = timedelta(minutes=minutes)
121
122    def utcoffset(self, dt):
123        return self._offset
124
125    def dst(self, dt):
126        return timedelta(0)
127
128    def tzname(self, dt):
129        return None
130
131
132local_tz = FixedOffsetTZ(minutes=time.timezone / 60)
133log_stream = StringIO()
134local_addr = None
135local_url_addr = None
136
137
138def set_local_addr(ipv6):
139    global local_addr
140    global local_url_addr
141
142    local_addr = "::1" if ipv6 else "127.0.0.1"
143    local_url_addr = "[::1]" if ipv6 else "127.0.0.1"
144
145
146log_line = None
147
148
149def buffer_log_line(message, new_line):
150    global log_line
151
152    line = log_line
153    if line is None:
154        now = datetime.now(tz=local_tz)
155        line = '[%s] ' % now.isoformat()
156
157    line += message
158    if new_line:
159        log_line = None
160        return line
161    else:
162        log_line = line
163        return None
164
165
166def log(message, new_line=True):
167    global log_stream
168
169    if new_line:
170        message += '\n'
171
172    bufline = buffer_log_line(message, new_line)
173    if bufline is not None:
174        log_stream.write(bufline)
175
176    sys.stderr.write(message)
177    sys.stderr.flush()
178
179
180def generate_hash(val):
181    return hashlib.sha1(val.encode())
182
183
184class AccessLogProcessor:
185    def __init__(self, salt):
186        self.salt = salt
187        self.column_parser = re.compile(
188            r'(^\S* \S* )(\S*)( \[.*\] \"\S* )(\S*)( .*$)')
189        self.urls_to_redact = [['/settings/rbac/users',
190                                re.compile(r'\/(?P<user>[^\/\s#&]+)([#&]|$)'),
191                                self._process_user, "user"],
192                               ['/_cbauth/checkPermission',
193                                re.compile(r'user=(?P<user>[^\s&#]+)'),
194                                self._process_user, "user"],
195                               ['/pools/default/buckets',
196                                re.compile(r'\/(?:[^\/\s#&]+)\/docs\/'
197                                           '(?P<docid>[^\/\s#&]+)$'),
198                                self._process_docid, "docid"]]
199
200    def _process_url(self, surl):
201        for conf in self.urls_to_redact:
202            prefix = conf[0]
203            if surl[:len(prefix)] == prefix:
204                return prefix + self._process_url_tail(conf[1], conf[2],
205                                                       conf[3],
206                                                       surl[len(prefix):])
207        return surl
208
209    def _process_url_tail(self, rex, fn, key, s):
210        m = rex.search(s)
211        if m is not None:
212            return s[:m.start(key)] + fn(m.group(key)) + s[m.end(key):]
213        else:
214            return s
215
216    def _process_user(self, user):
217        if user == '-' or user[0] == '@':
218            return user
219        elif user[-3:] == "/UI":
220            return self._hash(user[:-3]) + "/UI"
221        else:
222            return self._hash(user)
223
224    def _process_docid(self, docid):
225        return self._hash(docid)
226
227    def _hash(self, token):
228        return generate_hash(self.salt + token).hexdigest()
229
230    def _repl_columns(self, matchobj):
231        return matchobj.group(1) + \
232            self._process_user(matchobj.group(2)) + \
233            matchobj.group(3) + \
234            self._process_url(matchobj.group(4)) + \
235            matchobj.group(5)
236
237    def do(self, line):
238        return self.column_parser.sub(self._repl_columns, line)
239
240
241class RegularLogProcessor:
242    rexes = [re.compile('(<ud>)(.+?)(</ud>)'),
243             # Redact the rest of the line in the case we encounter
244             # log-redaction-salt. Needed to redact pre-6.5 debug logs
245             # as well as occurence in couchbase.log
246             re.compile('(log-redaction-salt)(.+)')]
247
248    def __init__(self, salt):
249        self.salt = salt
250
251    def _hash(self, match):
252        result = match.group(1)
253        if match.lastindex == 3:
254            h = generate_hash(self.salt + match.group(2)).hexdigest()
255            result += h + match.group(3)
256        elif match.lastindex == 2:
257            result += " <redacted>"
258        return result
259
260    def _process_line(self, line):
261        for rex in self.rexes:
262            line = rex.sub(self._hash, line)
263        return line
264
265    def do(self, line):
266        return self._process_line(line)
267
268
269class CouchbaseLogProcessor(RegularLogProcessor):
270    def do(self, line):
271        if "RedactLevel" in line:
272            # salt + salt to maintain consistency with other
273            # occurances of hashed salt in the logs.
274            return 'RedactLevel:partial,HashOfSalt:%s\n' \
275                % generate_hash(self.salt + self.salt).hexdigest()
276        else:
277            return self._process_line(line)
278
279
280class LogRedactor:
281    def __init__(self, salt, tmpdir, default_name):
282        self.default_name = default_name
283        self.target_dir = os.path.join(tmpdir, "redacted")
284        os.makedirs(self.target_dir)
285
286        self.access_log = AccessLogProcessor(salt)
287        self.couchbase_log = CouchbaseLogProcessor(salt)
288        self.regular_log = RegularLogProcessor(salt)
289
290    def _process_file(self, ifile, ofile, processor):
291        # Don't try to catch any errors here as we want failures (e.g.
292        # due to disk full) to abort the collection.  This will allow
293        # the cause of the failure to be fixed and collection retried.
294        with codecs.open(ifile, 'r', LATIN1) as inp:
295            with open(ofile, 'w+') as out:
296                for line in inp:
297                    out.write(processor.do(line))
298
299    def redact_file(self, name, ifile):
300        ofile = os.path.join(self.target_dir, name)
301        if "http_access" in name:
302            self._process_file(ifile, ofile, self.access_log)
303        elif name == self.default_name:
304            self._process_file(ifile, ofile, self.couchbase_log)
305        else:
306            self._process_file(ifile, ofile, self.regular_log)
307        return ofile
308
309
310class Task(object):
311    privileged = False
312    no_header = False
313    num_samples = 1
314    interval = 0
315
316    def __init__(self, description, command, timeout=None, **kwargs):
317        self.description = description
318        self.command = command
319        self.timeout = timeout
320        self.__dict__.update(kwargs)
321        self._is_posix = (os.name == 'posix')
322
323    def _platform_popen_flags(self):
324        flags = {}
325        if self._is_posix:
326            flags['preexec_fn'] = os.setpgrp
327
328        return flags
329
330    def _can_kill(self, p):
331        if self._is_posix:
332            return True
333
334        return hasattr(p, 'kill')
335
336    def _kill(self, p):
337        if self._is_posix:
338            group_pid = os.getpgid(p.pid)
339            os.killpg(group_pid, signal.SIGKILL)
340        else:
341            p.kill()
342
343    def _env_flags(self):
344        flags = {}
345        if hasattr(self, 'addenv'):
346            env = os.environ.copy()
347            env.update(self.addenv)
348            flags['env'] = env
349
350        return flags
351
352    def _cwd_flags(self):
353        flags = {}
354        if getattr(self, 'change_dir', False):
355            cwd = self._task_runner.tmpdir
356            if isinstance(self.change_dir, str):
357                cwd = self.change_dir
358
359            flags['cwd'] = cwd
360
361        return flags
362
363    def _extra_flags(self):
364        flags = self._env_flags()
365        flags.update(self._platform_popen_flags())
366        flags.update(self._cwd_flags())
367
368        return flags
369
370    def set_task_runner(self, runner):
371        self._task_runner = runner
372
373    def execute(self, fp):
374        """Run the task"""
375        use_shell = not isinstance(self.command, list)
376        extra_flags = self._extra_flags()
377        try:
378            p = subprocess.Popen(self.command, bufsize=-1,
379                                 stdin=subprocess.PIPE,
380                                 stdout=subprocess.PIPE,
381                                 stderr=subprocess.STDOUT,
382                                 shell=use_shell,
383                                 **extra_flags)
384            if hasattr(self, 'to_stdin'):
385                p.stdin.write(self.to_stdin.encode())
386
387            p.stdin.close()
388
389        except OSError as e:
390            # if use_shell is False then Popen may raise exception
391            # if binary is missing. In this case we mimic what
392            # shell does. Namely, complaining to stderr and
393            # setting non-zero status code. It's might also
394            # automatically handle things like "failed to fork due
395            # to some system limit".
396            fp.write(f"Failed to execute {self.command}: {e}\n".encode())
397            return 127
398
399        except IOError as e:
400            if e.errno == errno.EPIPE:
401                fp.write(f"Ignoring broken pipe on stdin for {self.command}\n".encode())
402            else:
403                raise
404
405        from threading import Timer, Event
406
407        timer = None
408        timer_fired = Event()
409
410        if self.timeout is not None and self._can_kill(p):
411            def on_timeout():
412                try:
413                    self._kill(p)
414                except BaseException:
415                    # the process might have died already
416                    pass
417
418                timer_fired.set()
419
420            timer = Timer(self.timeout, on_timeout)
421            timer.start()
422
423        try:
424            while True:
425                data = p.stdout.read(64 * 1024)
426                if not data:
427                    break
428
429                fp.write(data)
430        finally:
431            if timer is not None:
432                timer.cancel()
433                timer.join()
434
435                # there's a tiny chance that command succeeds just before
436                # timer is fired; that would result in a spurious timeout
437                # message
438                if timer_fired.isSet():
439                    fp.write(f"`{self.command}` timed out after {self.timeout} seconds\n".encode())
440                    log("[Command timed out after %s seconds] - " %
441                        (self.timeout), new_line=False)
442
443        return p.wait()
444
445    def will_run(self):
446        """Determine if this task will run on this platform."""
447        return sys.platform in self.platforms
448
449
450class TaskRunner(object):
451    default_name = "couchbase.log"
452
453    def __init__(self, verbosity=0, task_regexp='', tmp_dir=None,
454                 salt_value=""):
455        self.files = {}
456        self.verbosity = verbosity
457        self.start_time = time.strftime("%Y%m%d-%H%M%S", time.gmtime())
458        self.salt_value = salt_value
459
460        # Depending on platform, mkdtemp() may act unpredictably if passed an
461        # empty string.
462        if not tmp_dir:
463            tmp_dir = None
464        else:
465            tmp_dir = os.path.abspath(os.path.expanduser(tmp_dir))
466
467        try:
468            self.tmpdir = tempfile.mkdtemp(dir=tmp_dir)
469        except OSError as e:
470            print("Could not use temporary dir {0}: {1}".format(tmp_dir, e))
471            sys.exit(1)
472
473        # If a dir wasn't passed by --tmp-dir, check if the env var was set and
474        # if we were able to use it
475        if not tmp_dir and os.getenv("TMPDIR") and os.path.split(
476                self.tmpdir)[0] != os.getenv("TMPDIR"):
477            log("Could not use TMPDIR {0}".format(os.getenv("TMPDIR")))
478        log("Using temporary dir {0}".format(os.path.split(self.tmpdir)[0]))
479
480        self.task_regexp = re.compile(task_regexp)
481
482        AltExit.register(self.finalize)
483
484    def finalize(self):
485        try:
486            for fp in self.files.items():
487                fp.close()
488        except BaseException:
489            # Continue exit handling in spite of any exceptions
490            pass
491
492        shutil.rmtree(self.tmpdir, ignore_errors=True)
493
494    def collect_file(self, filename):
495        """Add a file to the list of files collected. Used to capture the exact
496        file (including timestamps) from the Couchbase instance.
497
498        filename - Absolute path to file to collect.
499        """
500        if filename not in self.files:
501            try:
502                self.files[filename] = open(filename, 'r')
503            except IOError as e:
504                log("Failed to collect file '%s': %s" % (filename, str(e)))
505        else:
506            log("Unable to collect file '%s' - already collected." % filename)
507
508    def get_file(self, filename):
509        if filename in self.files:
510            fp = self.files[filename]
511        else:
512            fp = open(os.path.join(self.tmpdir, filename), 'wb+')
513            self.files[filename] = fp
514
515        return fp
516
517    def header(self, fp, title, subtitle):
518        separator = '=' * 78
519        if isinstance(subtitle, list):
520            subtitle = " ".join(subtitle)
521        message = f"{separator}\n{title}\n{subtitle}\n{separator}\n"
522        fp.write(message.encode())
523        fp.flush()
524
525    def log_result(self, result):
526        if result == 0:
527            log("OK")
528        else:
529            log("Exit code %d" % result)
530
531    def run_tasks(self, tasks):
532        for task in tasks:
533            self.run(task)
534
535    def run(self, task):
536        if self.task_regexp.match(task.description) is None:
537            log("Skipping task %s because "
538                "it doesn't match '%s'" % (task.description,
539                                           self.task_regexp.pattern))
540        else:
541            self._run(task)
542
543    def _run(self, task):
544        """Run a task with a file descriptor corresponding to its log file"""
545        if task.will_run():
546            log("%s (%s) - " % (task.description, task.command), new_line=False)
547            if task.privileged and os.getuid() != 0:
548                log("skipped (needs root privs)")
549                return
550
551            task.set_task_runner(self)
552
553            filename = getattr(task, 'log_file', self.default_name)
554            fp = self.get_file(filename)
555            if not task.no_header:
556                self.header(fp, task.description, task.command)
557
558            for i in range(task.num_samples):
559                if i > 0:
560                    log("Taking sample %d after %f seconds - " %
561                        (i + 1, task.interval), new_line=False)
562                    time.sleep(task.interval)
563                result = task.execute(fp)
564                self.log_result(result)
565
566            for artifact in getattr(task, 'artifacts', []):
567                path = artifact
568                if not os.path.isabs(path):
569                    # we assume that "relative" artifacts are produced in the
570                    # self.tmpdir
571                    path = os.path.join(self.tmpdir, path)
572
573                self.collect_file(path)
574
575            fp.flush()
576
577        elif self.verbosity >= 2:
578            log('Skipping "%s" (%s): not for platform %s' %
579                (task.description, task.command, sys.platform))
580
581    def literal(self, description, value, **kwargs):
582        self.run(LiteralTask(description, value, **kwargs))
583
584    def redact_and_zip(self, filename, node):
585        files = []
586        redactor = LogRedactor(self.salt_value, self.tmpdir, self.default_name)
587
588        for name, fp in self.files.items():
589            if "users.dets" in name:
590                continue
591            files.append(redactor.redact_file(name, fp.name))
592
593        prefix = "cbcollect_info_%s_%s" % (node, self.start_time)
594        self._zip_helper(prefix, filename, files)
595
596    def close_all_files(self):
597        for name, fp in self.files.items():
598            fp.close()
599
600    def zip(self, filename, node):
601        prefix = "cbcollect_info_%s_%s" % (node, self.start_time)
602
603        files = []
604        for name, fp in self.files.items():
605            files.append(fp.name)
606        self._zip_helper(prefix, filename, files)
607
608    def _zip_helper(self, prefix, filename, files):
609        """Write all our logs to a zipfile"""
610        exe = exec_name("gozip")
611
612        fallback = False
613
614        try:
615            p = subprocess.Popen([exe,
616                                  "-strip-path",
617                                  "-prefix",
618                                  prefix,
619                                  filename] + files,
620                                 stderr=subprocess.STDOUT,
621                                 stdin=subprocess.PIPE)
622            p.stdin.close()
623            status = p.wait()
624
625            if status != 0:
626                log("gozip terminated with non-zero exit code (%d)" % status)
627        except OSError as e:
628            log("Exception during compression: %s" % e)
629            fallback = True
630
631        if fallback:
632            log("IMPORTANT:")
633            log("  Compression using gozip failed.")
634            log("  Falling back to python implementation.")
635            log("  Please let us know about this and provide console output.")
636
637            self._zip_fallback(filename, prefix, files)
638
639    def _zip_fallback(self, filename, prefix, files):
640        from zipfile import ZipFile, ZIP_DEFLATED
641        zf = ZipFile(filename, mode='w', compression=ZIP_DEFLATED)
642        try:
643            for name in files:
644                zf.write(name,
645                         "%s/%s" % (prefix, os.path.basename(name)))
646        finally:
647            zf.close()
648
649
650class SolarisTask(Task):
651    platforms = ['sunos5', 'solaris']
652
653
654class LinuxTask(Task):
655    platforms = ['linux']
656
657
658class WindowsTask(Task):
659    platforms = ['win32', 'cygwin']
660
661
662class MacOSXTask(Task):
663    platforms = ['darwin']
664
665
666class UnixTask(SolarisTask, LinuxTask, MacOSXTask):
667    platforms = SolarisTask.platforms + LinuxTask.platforms + MacOSXTask.platforms
668
669
670class AllOsTask(UnixTask, WindowsTask):
671    platforms = UnixTask.platforms + WindowsTask.platforms
672
673
674class LiteralTask(AllOsTask):
675    def __init__(self, description, literal, **kwargs):
676        self.description = description
677        self.command = ''
678        self.literal = literal
679        self.__dict__.update(kwargs)
680
681    def execute(self, fp):
682        fp.write(self.literal.encode() + b'\n')
683        return 0
684
685
686class CollectFile(AllOsTask):
687    def __init__(self, description, file_path, **kwargs):
688        self.description = description
689        self.command = ''
690        self.file_path = file_path
691        self.__dict__.update(kwargs)
692
693    def execute(self, fp):
694        self._task_runner.collect_file(self.file_path)
695        fp.write(f"Collected file {self.file_path}\n".encode())
696        return 0
697
698
699def make_curl_task(name, user, password, url,
700                   timeout=60, log_file="couchbase.log", base_task=AllOsTask,
701                   **kwargs):
702    return base_task(name, ["curl", "-sS", "--proxy", "", "-K-", url],
703                     timeout=timeout,
704                     log_file=log_file,
705                     to_stdin="--user %s:%s" % (user, password),
706                     **kwargs)
707
708
709def make_cbstats_task(kind, memcached_pass, guts):
710    port = read_guts(guts, "memcached_port")
711    user = read_guts(guts, "memcached_admin")
712    return AllOsTask("memcached stats %s" % kind,
713                     flatten(["cbstats", "-a", "%s:%s" %
714                              (local_url_addr, port), kind, "-u", user]),
715                     log_file="stats.log",
716                     timeout=60,
717                     addenv=[("CB_PASSWORD", memcached_pass)])
718
719
720def get_local_token(guts, port):
721    path = read_guts(guts, "localtoken_path")
722    token = ""
723    try:
724        with open(path, 'r') as f:
725            token = f.read().rstrip('\n')
726    except IOError as e:
727        log("I/O error(%s): %s" % (e.errno, e.strerror))
728    return token
729
730
731def get_diag_password(guts):
732    port = read_guts(guts, "rest_port")
733    pwd = get_local_token(guts, port)
734    url = "http://%s:%s/diag/password" % (local_url_addr, port)
735    command = ["curl", "-sS", "--proxy", "", "-u", "@localtoken:%s" % pwd, url]
736
737    task = AllOsTask("get diag password", command, timeout=60)
738    output_bytes = BytesIO()
739    status = task.execute(output_bytes)
740    output = output_bytes.getvalue().decode(LATIN1)
741    if status == 0:
742        return output
743    log(output)
744    return ""
745
746
747def make_query_task(statement, user, password, port):
748    url = "http://%s:%s/query/service?statement=%s" % (
749        local_url_addr, port, urllib.parse.quote(statement))
750
751    return make_curl_task(name="Result of query statement \'%s\'" % statement,
752                          user=user, password=password, url=url)
753
754
755def make_index_task(name, api, passwd, index_port, logfile="couchbase.log", **kwargs):
756    index_url = 'http://%s:%s/%s' % (local_url_addr, index_port, api)
757
758    return make_curl_task(name, "@", passwd, index_url, log_file=logfile, **kwargs)
759
760
761def make_redaction_task():
762    return LiteralTask("Log Redaction", "RedactLevel:none")
763
764
765def basedir():
766    # We are installed in $INSTALL_DIR/lib/python, so need to go up three
767    # levels
768    return os.path.normpath(
769        os.path.join(
770            os.path.abspath(__file__),
771            '..', '..', '..'
772        )
773    )
774
775
776def make_event_log_task():
777    from datetime import datetime, timedelta
778
779    # I found that wmic ntevent can be extremely slow; so limiting the output
780    # to approximately last month
781    limit = datetime.today() - timedelta(days=31)
782    limit = limit.strftime('%Y%m%d000000.000000-000')
783
784    return WindowsTask(
785        "Event log",
786        "wmic ntevent where "
787        "\""
788        "(LogFile='application' or LogFile='system') and "
789        "EventType<3 and TimeGenerated>'%(limit)s'"
790        "\" "
791        "get TimeGenerated,LogFile,SourceName,EventType,Message "
792        "/FORMAT:list" %
793        locals())
794
795
796def make_os_tasks():
797    programs = " ".join(["moxi", "memcached", "beam.smp",
798                         "couch_compact", "godu", "sigar_port",
799                         "cbq-engine", "indexer", "projector", "goxdcr",
800                         "cbft", "eventing-producer", "eventing-consumer"])
801
802    _tasks = [
803        UnixTask("uname", "uname -a"),
804        UnixTask("time and TZ", "date; date -u"),
805        UnixTask("ntp time",
806                 "ntpdate -q pool.ntp.org || "
807                 "nc time.nist.gov 13 || "
808                 "netcat time.nist.gov 13", timeout=60),
809        UnixTask("ntp peers", "ntpq -p"),
810        UnixTask("raw /etc/sysconfig/clock", "cat /etc/sysconfig/clock"),
811        UnixTask("raw /etc/timezone", "cat /etc/timezone"),
812        WindowsTask("System information", "systeminfo"),
813        WindowsTask("Computer system", "wmic computersystem"),
814        WindowsTask("Computer OS", "wmic os"),
815        LinuxTask("System Hardware", "lshw -json || lshw"),
816        SolarisTask("Process list snapshot",
817                    "prstat -a -c -n 100 -t -v -L 1 10"),
818        SolarisTask("Process list", "ps -ef"),
819        SolarisTask("Service configuration", "svcs -a"),
820        SolarisTask("Swap configuration", "swap -l"),
821        SolarisTask("Disk activity", "zpool iostat 1 10"),
822        SolarisTask("Disk activity", "iostat -E 1 10"),
823        LinuxTask("Process list snapshot",
824                  "export TERM=''; top -Hb -n1 || top -H n1"),
825        LinuxTask(
826            "Process list",
827            "ps -AwwL -o user,pid,lwp,ppid,nlwp,pcpu,maj_flt,min_flt,pri,nice,vsize,rss,tty,stat,wchan:12,start,"
828            "bsdtime,comm,command"),
829        LinuxTask("Raw /proc/buddyinfo", "cat /proc/buddyinfo"),
830        LinuxTask("Raw /proc/meminfo", "cat /proc/meminfo"),
831        LinuxTask("Raw /proc/pagetypeinfo", "cat /proc/pagetypeinfo"),
832        LinuxTask("Raw /proc/zoneinfo", "cat /proc/zoneinfo"),
833        LinuxTask("Raw /proc/vmstat", "cat /proc/vmstat"),
834        LinuxTask("Raw /proc/mounts", "cat /proc/mounts"),
835        LinuxTask("Raw /proc/partitions", "cat /proc/partitions"),
836        LinuxTask("Raw /proc/diskstats",
837                  "cat /proc/diskstats; echo ''", num_samples=10, interval=1),
838        LinuxTask("Raw /proc/interrupts", "cat /proc/interrupts"),
839        LinuxTask("Swap configuration", "free -t"),
840        LinuxTask("Swap configuration", "swapon -s"),
841        LinuxTask("Kernel modules", "lsmod"),
842        LinuxTask("Distro version", "cat /etc/redhat-release"),
843        LinuxTask("Distro version", "cat /etc/oracle-release"),
844        LinuxTask("Distro version", "lsb_release -a"),
845        LinuxTask("Distro version", "cat /etc/SuSE-release"),
846        LinuxTask("Distro version", "cat /etc/issue"),
847        LinuxTask("Distro version", "cat /etc/os-release"),
848        LinuxTask("Distro version", "cat /etc/system-release"),
849        LinuxTask("Installed software", "rpm -qa"),
850        LinuxTask("Ksplice updates", "uptrack-show"),
851        LinuxTask("Hot fix list", "rpm -V couchbase-server"),
852        # NOTE: AFAIK columns _was_ necessary, but it doesn't appear to be
853        # required anymore. I.e. dpkg -l correctly detects stdout as not a
854        # tty and stops playing smart on formatting. Lets keep it for few
855        # years and then drop, however.
856        LinuxTask("Installed software", "COLUMNS=300 dpkg -l"),
857        # NOTE: -V is supported only from dpkg v1.17.2 onwards.
858        LinuxTask("Hot fix list", "COLUMNS=300 dpkg -V couchbase-server"),
859        LinuxTask(
860            "Extended iostat",
861            "iostat -x -p ALL 1 10 || iostat -x 1 10"),
862        LinuxTask("Core dump settings",
863                  "find /proc/sys/kernel -type f -name '*core*' -print -exec cat '{}' ';'"),
864        UnixTask("sysctl settings", "sysctl -a"),
865        LinuxTask("Relevant lsof output",
866                  "echo %(programs)s | xargs -n1 pgrep | xargs -n1 -r -- lsof -n -p" % locals()),
867        LinuxTask("LVM info", "lvdisplay"),
868        LinuxTask("LVM info", "vgdisplay"),
869        LinuxTask("LVM info", "pvdisplay"),
870        LinuxTask("Block device queue settings",
871                  "find /sys/block/*/queue -type f | xargs grep -vH xxxx | sort"),
872        MacOSXTask("Process list snapshot", "top -l 1"),
873        MacOSXTask("Disk activity", "iostat 1 10"),
874        MacOSXTask("Process list",
875                   "ps -Aww -o user,pid,lwp,ppid,nlwp,pcpu,pri,nice,vsize,rss,tty,"
876                   "stat,wchan:12,start,bsdtime,command"),
877        WindowsTask("Installed software", "wmic product get name, version"),
878        WindowsTask(
879            "Service list", "wmic service where state=\"running\" GET caption, name, state"),
880        WindowsTask("Process list", "wmic process"),
881        WindowsTask("Process usage", "tasklist /V /fo list"),
882        WindowsTask("Swap settings", "wmic pagefile"),
883        WindowsTask("Disk partition", "wmic partition"),
884        WindowsTask("Disk volumes", "wmic volume"),
885        UnixTask("Network configuration", "ifconfig -a", interval=10,
886                 num_samples=2),
887        LinuxTask("Network configuration",
888                  "echo link addr neigh rule route netns | xargs -n1 -- sh -x -c 'ip $1 list' --"),
889        WindowsTask("Network configuration", "ipconfig /all", interval=10,
890                    num_samples=2),
891        LinuxTask("Raw /proc/net/dev", "cat /proc/net/dev"),
892        LinuxTask("Network link statistics", "ip -s link"),
893        UnixTask("Network status", "netstat -anp || netstat -an"),
894        WindowsTask("Network status", "netstat -anotb"),
895        AllOsTask("Network routing table", "netstat -rn"),
896        LinuxTask("Network socket statistics", "ss -an"),
897        LinuxTask("Extended socket statistics",
898                  "ss -an --info --processes --memory --options",
899                  timeout=300),
900        UnixTask("Arp cache", "arp -na"),
901        LinuxTask("Iptables dump", "iptables-save"),
902        UnixTask("Raw /etc/hosts", "cat /etc/hosts"),
903        UnixTask("Raw /etc/resolv.conf", "cat /etc/resolv.conf"),
904        UnixTask("Raw /etc/nsswitch.conf", "cat /etc/nsswitch.conf"),
905        WindowsTask("Arp cache", "arp -a"),
906        WindowsTask("Network Interface Controller", "wmic nic"),
907        WindowsTask("Network Adapter", "wmic nicconfig"),
908        WindowsTask("Active network connection", "wmic netuse"),
909        WindowsTask("Protocols", "wmic netprotocol"),
910        WindowsTask(
911            "Hosts file", "type %SystemRoot%\system32\drivers\etc\hosts"),
912        WindowsTask("Cache memory", "wmic memcache"),
913        WindowsTask("Physical memory", "wmic memphysical"),
914        WindowsTask("Physical memory chip info", "wmic memorychip"),
915        WindowsTask("Local storage devices", "wmic logicaldisk"),
916        UnixTask("Filesystem", "df -ha"),
917        UnixTask("System activity reporter", "sar 1 10"),
918        UnixTask("System paging activity", "vmstat 1 10"),
919        UnixTask("System uptime", "uptime"),
920        UnixTask("Last logins of users and ttys", "last -x || last"),
921        UnixTask("couchbase user definition", "getent passwd couchbase"),
922        UnixTask("couchbase user limits", "su couchbase -s /bin/sh -c \"ulimit -a\"",
923                 privileged=True),
924        UnixTask("Interrupt status", "intrstat 1 10"),
925        UnixTask("Processor status", "mpstat 1 10"),
926        UnixTask("System log", "cat /var/adm/messages"),
927        LinuxTask("Raw /proc/uptime", "cat /proc/uptime"),
928        LinuxTask("Systemd journal",
929                  "journalctl | gzip -c > systemd_journal.gz",
930                  change_dir=True, artifacts=['systemd_journal.gz']),
931        LinuxTask("All logs",
932                  "tar cz /var/log/syslog* /var/log/dmesg /var/log/messages* /var/log/daemon* /var/log/debug* "
933                  "/var/log/kern.log* 2>/dev/null",
934                  log_file="syslog.tar.gz", no_header=True),
935        LinuxTask("Relevant proc data", "echo %(programs)s | "
936                  "xargs -n1 pgrep | xargs -n1 -- sh -c 'echo $1; cat /proc/$1/status; cat /proc/$1/limits; "
937                  "cat /proc/$1/smaps; cat /proc/$1/numa_maps; cat /proc/$1/task/*/sched; echo' --" % locals()),
938        LinuxTask("Processes' environment", "echo %(programs)s | "
939                  r"xargs -n1 pgrep | xargs -n1 -- sh -c 'echo $1; ( cat /proc/$1/environ | tr \\0 \\n | "
940                  "egrep -v ^CB_MASTER_PASSWORD=\|^CBAUTH_REVRPC_URL=); echo' --" % locals()),
941        LinuxTask("Processes' stack",
942                  "for program in %(programs)s; do for thread in $(pgrep --lightweight $program); "
943                  "do echo $program/$thread:; cat /proc/$thread/stack; echo; done; done" % locals()),
944        LinuxTask("NUMA data", "numactl --hardware"),
945        LinuxTask("NUMA data", "numactl --show"),
946        LinuxTask("NUMA data", "cat /sys/devices/system/node/node*/numastat"),
947        UnixTask("Kernel log buffer", "dmesg -T || dmesg -H || dmesg"),
948        LinuxTask("Transparent Huge Pages data",
949                  "cat /sys/kernel/mm/transparent_hugepage/enabled"),
950        LinuxTask("Transparent Huge Pages data",
951                  "cat /sys/kernel/mm/transparent_hugepage/defrag"),
952        LinuxTask("Transparent Huge Pages data",
953                  "cat /sys/kernel/mm/redhat_transparent_hugepage/enabled"),
954        LinuxTask("Transparent Huge Pages data",
955                  "cat /sys/kernel/mm/redhat_transparent_hugepage/defrag"),
956        LinuxTask("Network statistics", "netstat -s"),
957        LinuxTask("Full raw netstat", "cat /proc/net/netstat"),
958        LinuxTask("CPU throttling info",
959                  "echo /sys/devices/system/cpu/cpu*/thermal_throttle/* | xargs -n1 -- sh -c 'echo $1; cat $1' --"),
960        LinuxTask("Raw PID 1 scheduler /proc/1/sched",
961                  "cat /proc/1/sched | head -n 1"),
962        LinuxTask("Raw PID 1 control groups /proc/1/cgroup",
963                  "cat /proc/1/cgroup"),
964        make_event_log_task(),
965    ]
966
967    return _tasks
968
969# stolen from
970# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
971
972
973def iter_flatten(iterable):
974    it = iter(iterable)
975    for e in it:
976        if isinstance(e, (list, tuple)):
977            for f in iter_flatten(e):
978                yield f
979        else:
980            yield e
981
982
983def flatten(iterable):
984    return [e for e in iter_flatten(iterable)]
985
986
987def read_guts(guts, key):
988    return guts.get(key, "")
989
990
991def winquote_path(s):
992    return '"' + s.replace("\\\\", "\\").replace('/', "\\") + '"'
993
994# python's split splits empty string to [''] which doesn't make any
995# sense. So this function works around that.
996
997
998def correct_split(string, splitchar):
999    rv = string.split(splitchar)
1000    if rv == ['']:
1001        rv = []
1002    return rv
1003
1004
1005def make_stats_archives_task(guts, initargs_path):
1006    escript = exec_name("escript")
1007    escript_wrapper = find_script("escript-wrapper")
1008    dump_stats = find_script("dump-stats")
1009    stats_dir = read_guts(guts, "stats_dir")
1010
1011    if dump_stats is None or escript_wrapper is None or not stats_dir:
1012        return []
1013
1014    output_file = "stats_archives.json"
1015    return AllOsTask("stats archives",
1016                     [escript,
1017                      escript_wrapper,
1018                      "--initargs-path", initargs_path, "--",
1019                      dump_stats, stats_dir, output_file],
1020                     change_dir=True,
1021                     artifacts=[output_file])
1022
1023
1024def make_product_task(guts, initargs_path, memcached_pass, options):
1025    root = os.path.abspath(os.path.join(initargs_path, "..", "..", "..", ".."))
1026    dbdir = os.path.realpath(read_guts(guts, "db_dir"))
1027    viewdir = os.path.realpath(read_guts(guts, "idx_dir"))
1028    rebdir = os.path.realpath(os.path.join(
1029        read_guts(guts, "log_path"), "rebalance"))
1030    nodes = correct_split(read_guts(guts, "nodes"), ",")
1031
1032    diag_url = "http://%s:%s/diag" % (
1033        local_url_addr, read_guts(guts, "rest_port"))
1034
1035    from distutils.spawn import find_executable
1036
1037    lookup_cmd = None
1038    for cmd in ["dig", "nslookup", "host"]:
1039        if find_executable(cmd) is not None:
1040            lookup_cmd = cmd
1041            break
1042
1043    lookup_tasks = []
1044    if lookup_cmd is not None:
1045        lookup_tasks = [UnixTask(f"DNS lookup information for {node}",
1046                                 f"{lookup_cmd} '{node}'")
1047                        for node in nodes]
1048
1049    getent_tasks = [LinuxTask("Name Service Switch "
1050                              "hosts database info for %s" % node,
1051                              ["getent", "ahosts", node])
1052                    for node in nodes]
1053
1054    query_tasks = []
1055    query_port = read_guts(guts, "query_port")
1056    if query_port:
1057        def make(statement):
1058            return make_query_task(statement, user="@",
1059                                   password=memcached_pass,
1060                                   port=query_port)
1061
1062        query_tasks = [make("SELECT * FROM system:datastores"),
1063                       make("SELECT * FROM system:namespaces"),
1064                       make("SELECT * FROM system:keyspaces"),
1065                       make("SELECT * FROM system:indexes")]
1066
1067    index_tasks = []
1068    index_port = read_guts(guts, "indexer_http_port")
1069    if index_port:
1070        index_tasks = [
1071            make_index_task(
1072                "Index definitions are: ",
1073                "getIndexStatus",
1074                memcached_pass,
1075                index_port),
1076            make_index_task(
1077                "Indexer settings are: ",
1078                "settings",
1079                memcached_pass,
1080                index_port),
1081            make_index_task(
1082                "Indexer stats are: ",
1083                "stats?partition=true",
1084                memcached_pass,
1085                index_port),
1086            make_index_task(
1087                "Index storage stats are: ",
1088                "stats/storage",
1089                memcached_pass,
1090                index_port),
1091            make_index_task(
1092                "MOI allocator stats are: ",
1093                "stats/storage/mm",
1094                memcached_pass,
1095                index_port),
1096            make_index_task(
1097                "Indexer Go routine dump: ",
1098                "debug/pprof/goroutine?debug=1",
1099                memcached_pass,
1100                index_port,
1101                logfile="indexer_pprof.log"),
1102            make_index_task(
1103                "Indexer Rebalance Tokens: ",
1104                "listRebalanceTokens",
1105                memcached_pass,
1106                index_port),
1107            make_index_task(
1108                "Indexer Metadata Tokens: ",
1109                "listMetadataTokens",
1110                memcached_pass,
1111                index_port),
1112            make_index_task(
1113                "Indexer CPU Profile: ",
1114                "debug/pprof/profile",
1115                memcached_pass,
1116                index_port,
1117                logfile="indexer_cprof.log",
1118                no_header=True),
1119            make_index_task(
1120                "Indexer Memory Profile: ",
1121                "debug/pprof/heap?debug=1",
1122                memcached_pass,
1123                index_port,
1124                logfile="indexer_mprof.log",
1125                no_header=True),
1126        ]
1127
1128    projector_tasks = []
1129    proj_port = read_guts(guts, "projector_port")
1130    if proj_port:
1131        proj_url = 'http://%s:%s/debug/pprof/goroutine?debug=1' % (
1132            local_url_addr, proj_port)
1133        projector_tasks = [
1134            make_curl_task(
1135                name="Projector Go routine dump ",
1136                user="@",
1137                password=memcached_pass,
1138                url=proj_url,
1139                log_file="projector_pprof.log")]
1140
1141    goxdcr_tasks = []
1142    goxdcr_port = read_guts(guts, "xdcr_rest_port")
1143    if goxdcr_port:
1144        goxdcr_url = 'http://%s:%s/debug/pprof/goroutine?debug=1' % (
1145            local_url_addr, goxdcr_port)
1146        goxdcr_tasks = [
1147            make_curl_task(
1148                name="GoXDCR Go routine dump ",
1149                user="@",
1150                password=memcached_pass,
1151                url=goxdcr_url,
1152                log_file="goxdcr_pprof.log")]
1153
1154    fts_tasks = []
1155    fts_port = read_guts(guts, "fts_http_port")
1156    if fts_port:
1157        url = 'http://%s:%s/api/diag' % (local_url_addr, fts_port)
1158        fts_tasks = [make_curl_task(name="FTS /api/diag: ",
1159                                    user="@", password=memcached_pass,
1160                                    url=url,
1161                                    log_file="fts_diag.json", no_header=True)]
1162
1163    cbas_tasks = []
1164    cbas_port = read_guts(guts, "cbas_admin_port")
1165    if cbas_port:
1166        cbas_diag_url = 'http://%s:%s/analytics/node/diagnostics' % (
1167            local_url_addr, cbas_port)
1168        cbas_parent_port = read_guts(guts, "cbas_parent_port")
1169        cbas_pprof_url = 'http://%s:%s/debug/pprof/goroutine?debug=1' % (
1170            local_url_addr, cbas_parent_port)
1171
1172        def make_cbas(statement):
1173            return make_query_task(statement, user="@",
1174                                   password=memcached_pass,
1175                                   port=cbas_port)
1176
1177        cbas_tasks = [
1178            make_curl_task(
1179                name="Analytics /analytics/node/diagnostics: ",
1180                user="@",
1181                password=memcached_pass,
1182                url=cbas_diag_url,
1183                log_file="analytics_diag.json",
1184                no_header=True),
1185            make_curl_task(
1186                name="Analytics Go routine dump: ",
1187                user="@",
1188                password=memcached_pass,
1189                url=cbas_pprof_url,
1190                log_file="analytics_pprof.log",
1191                no_header=True),
1192            make_cbas("select * from `Metadata`.`Dataverse`"),
1193            make_cbas("select * from `Metadata`.`Dataset`"),
1194            make_cbas("select * from `Metadata`.`Index`"),
1195            make_cbas("select * from `Metadata`.`Bucket`"),
1196            make_cbas("select * from `Metadata`.`Link`")]
1197
1198    eventing_tasks = []
1199    eventing_port = read_guts(guts, "eventing_http_port")
1200    if eventing_port:
1201        stats_url = 'http://%s:%s/api/v1/stats?type=full' % (
1202            local_url_addr, eventing_port)
1203        eventing_pprof_url = 'http://%s:%s/debug/pprof/goroutine?debug=1' % (
1204            local_url_addr, eventing_port)
1205        eventing_insight_url = 'http://%s:%s/getInsight?udmark=true&aggregate=false' % (
1206            local_url_addr, eventing_port)
1207        eventing_tasks = [
1208            make_curl_task(
1209                name="Eventing /api/v1/stats: ",
1210                user="@",
1211                password=memcached_pass,
1212                url=stats_url,
1213                log_file="eventing_stats.json",
1214                no_header=True),
1215            make_curl_task(
1216                name="Eventing Go routine dump: ",
1217                user="@",
1218                password=memcached_pass,
1219                url=eventing_pprof_url,
1220                log_file="eventing_pprof.log",
1221                no_header=True),
1222            make_curl_task(
1223                name="Eventing code insights: ",
1224                user="@",
1225                password=memcached_pass,
1226                url=eventing_insight_url,
1227                log_file="eventing_insights.log",
1228                no_header=True)]
1229
1230    _tasks = [
1231        UnixTask("Directory structure",
1232                 ["ls", "-lRai", root]),
1233        UnixTask("Database directory structure",
1234                 ["ls", "-lRai", dbdir]),
1235        UnixTask("Index directory structure",
1236                 ["ls", "-lRai", viewdir]),
1237        UnixTask("couch_dbinfo",
1238                 ["find", dbdir, "-type", "f",
1239                  "-name", "*.couch.*",
1240                  "-exec", "couch_dbinfo", "{}", "+"]),
1241        LinuxTask("Database directory filefrag info",
1242                  ["find", dbdir, "-type", "f", "-exec", "filefrag", "-v", "{}", "+"]),
1243        LinuxTask("Index directory filefrag info",
1244                  ["find", viewdir, "-type", "f", "-exec", "filefrag", "-v", "{}", "+"]),
1245        WindowsTask("Database directory structure",
1246                    "dir /s " + winquote_path(dbdir)),
1247        WindowsTask("Index directory structure",
1248                    "dir /s " + winquote_path(viewdir)),
1249        WindowsTask("Version file",
1250                    "type " + winquote_path(basedir()) + "\\VERSION.txt"),
1251        WindowsTask("Manifest file",
1252                    "type " + winquote_path(basedir()) + "\\manifest.txt"),
1253        WindowsTask("Manifest file",
1254                    "type " + winquote_path(basedir()) + "\\manifest.xml"),
1255        LinuxTask("Version file", "cat '%s/VERSION.txt'" % root),
1256        LinuxTask("Variant file", "cat '%s/VARIANT.txt'" % root),
1257        LinuxTask("Manifest file", "cat '%s/manifest.txt'" % root),
1258        LinuxTask("Manifest file", "cat '%s/manifest.xml'" % root),
1259        LiteralTask("Couchbase config", read_guts(guts, "ns_config")),
1260        LiteralTask("Couchbase static config",
1261                    read_guts(guts, "static_config")),
1262        LiteralTask("Raw ns_log", read_guts(guts, "ns_log")),
1263        # TODO: just gather those in python
1264        WindowsTask("Memcached logs",
1265                    "cd " + winquote_path(read_guts(guts, "memcached_logs_path")) + " && " +
1266                    "for /f %a IN ('dir memcached.log.* /od /tw /b') do type %a",
1267                    log_file="memcached.log"),
1268        UnixTask("Memcached logs",
1269                 ["sh", "-c", 'cd "$1"; for file in $(ls -tr memcached.log.*); do cat \"$file\"; done',
1270                  "--", read_guts(guts, "memcached_logs_path")],
1271                 log_file="memcached.log"),
1272        [WindowsTask("Ini files (%s)" % p,
1273                     "type " + winquote_path(p),
1274                     log_file="ini.log")
1275         for p in read_guts(guts, "couch_inis").split(";")],
1276        UnixTask("Ini files",
1277                 ["sh", "-c", 'for i in "$@"; do echo "file: $i"; cat "$i"; done',
1278                     "--"] + read_guts(guts, "couch_inis").split(";"),
1279                 log_file="ini.log"),
1280
1281        make_curl_task(name="couchbase diags",
1282                       user="@",
1283                       password=memcached_pass,
1284                       timeout=600,
1285                       url=diag_url,
1286                       log_file="diag.log"),
1287
1288        make_curl_task(name="master events",
1289                       user="@",
1290                       password=memcached_pass,
1291                       timeout=300,
1292                       url='http://%s:%s/diag/masterEvents?o=1' % (
1293                           local_url_addr, read_guts(guts, "rest_port")),
1294                       log_file="master_events.log",
1295                       no_header=True),
1296
1297        make_curl_task(name="ale configuration",
1298                       user="@",
1299                       password=memcached_pass,
1300                       url='http://%s:%s/diag/ale' % (
1301                           local_url_addr, read_guts(guts, "rest_port")),
1302                       log_file="couchbase.log"),
1303
1304        [AllOsTask("couchbase logs (%s)" % name, "cbbrowse_logs %s" % name,
1305                   addenv=[("REPORT_DIR", read_guts(guts, "log_path"))],
1306                   log_file="ns_server.%s" % name)
1307         for name in ["debug.log", "info.log", "error.log", "couchdb.log",
1308                      "xdcr_target.log",
1309                      "views.log", "mapreduce_errors.log",
1310                      "stats.log", "babysitter.log",
1311                      "reports.log", "http_access.log",
1312                      "http_access_internal.log", "ns_couchdb.log",
1313                      "goxdcr.log", "query.log", "projector.log", "indexer.log",
1314                      "fts.log", "metakv.log", "json_rpc.log", "eventing.log",
1315                      "analytics_info.log", "analytics_debug.log", "analytics_shutdown.log",
1316                      "analytics_error.log", "analytics_warn.log", "analytics_dcpdebug.log",
1317                      "analytics_trace.json", "analytics_access.log", "analytics_cbas_debug.log"]],
1318
1319        [make_cbstats_task(kind, memcached_pass, guts)
1320         for kind in ["all", "allocator", "checkpoint", "collections", "config",
1321                      "dcp", "dcpagg",
1322                      ["diskinfo", "detail"], ["dispatcher", "logs"],
1323                      "eviction", "failovers", ["hash", "detail"],
1324                      "kvstore", "kvtimings", "memory",
1325                      "prev-vbucket", ["responses", "all"],
1326                      "runtimes", "scheduler", "scopes",
1327                      "tasks",
1328                      "timings", "uuid",
1329                      "vbucket-details", "vbucket-seqno",
1330                      "warmup", "workload"]],
1331
1332        [AllOsTask("memcached mcstat %s" % kind,
1333                   flatten(["mcstat", "-h", "%s:%s" % (local_url_addr, read_guts(guts, "memcached_port")),
1334                            "-u", read_guts(guts, "memcached_admin"), kind]),
1335                   log_file="stats.log",
1336                   timeout=60,
1337                   addenv=[("CB_PASSWORD", memcached_pass)])
1338         for kind in ["connections", "tracing"]],
1339
1340        [AllOsTask("fts mossScope (%s)" % path,
1341                   ["mossScope", "stats", "diag", path],
1342                   log_file="fts_store_stats.log")
1343         for path in glob.glob(os.path.join(viewdir, "@fts", "*.pindex", "store"))
1344            if any(".moss" in entry for entry in os.listdir(path))],
1345
1346        [AllOsTask("fts scorch zap (%s)" % path,
1347                   ["cbft-bleve", "zap", "footer", path],
1348                   log_file="fts_store_stats.log")
1349         for path in glob.glob(os.path.join(viewdir, "@fts", "*.pindex", "store", "*.zap"))],
1350
1351        [AllOsTask("ddocs for %s (%s)" % (bucket, path),
1352                   ["couch_dbdump", path],
1353                   log_file="ddocs.log")
1354         for bucket in set(correct_split(read_guts(guts, "buckets"), ",")) - \
1355            set(correct_split(read_guts(guts, "memcached_buckets"), ","))
1356         for path in glob.glob(os.path.join(dbdir, bucket, "master.couch*"))],
1357
1358        [AllOsTask("Couchstore local documents (%s, %s)" % (bucket, os.path.basename(path)),
1359                   ["couch_dbdump", "--local", path],
1360                   log_file="couchstore_local.log")
1361         for bucket in set(correct_split(read_guts(guts, "buckets"), ",")) - \
1362            set(correct_split(read_guts(guts, "memcached_buckets"), ","))
1363         for path in glob.glob(os.path.join(dbdir, bucket, "*.couch.*"))],
1364
1365        # RocksDB has logs per DB (i.e. vBucket). 'LOG' is the most
1366        # recent file, with old files named LOG.old.<timestamp>.
1367        # Sort so we go from oldest -> newest as per other log files.
1368        [AllOsTask("RocksDB Log file (%s, %s)" % (bucket, os.path.basename(path)),
1369                   "cat '%s'" % (log_file),
1370                   log_file="kv_rocks.log")
1371         for bucket in (set(correct_split(read_guts(guts, "buckets"), ",")) -
1372                        set(correct_split(read_guts(guts, "memcached_buckets"), ",")))
1373         for path in glob.glob(os.path.join(dbdir, bucket, "rocksdb.*"))
1374         for log_file in sorted(glob.glob(os.path.join(path, "LOG.old.*"))) + [os.path.join(path, "LOG")]],
1375
1376        [UnixTask("moxi stats (port %s)" % port,
1377                  "echo stats proxy | nc %s %s" % (local_addr, port),
1378                  log_file="stats.log",
1379                  timeout=60)
1380         for port in correct_split(read_guts(guts, "moxi_ports"), ",")],
1381
1382        [AllOsTask("mctimings %s" % stat,
1383                   ["mctimings",
1384                    "-u", read_guts(guts, "memcached_admin"),
1385                    "-h", "%s:%s" % (local_url_addr,
1386                                     read_guts(guts, "memcached_port")),
1387                    "-v"] + stat,
1388                   log_file="stats.log",
1389                   timeout=60,
1390                   addenv=[("CB_PASSWORD", memcached_pass)])
1391         for stat in ([], ["subdoc_execute"])],
1392
1393        CollectFile("Users storage", read_guts(guts, "users_storage_path")),
1394
1395        [CollectFile("Rebalance Report: %s" % path, path)
1396         for path in glob.glob(os.path.join(rebdir, "rebalance_report*"))],
1397
1398        make_stats_archives_task(guts, initargs_path),
1399        AllOsTask("Phosphor Trace",
1400                  ["kv_trace_dump",
1401                   "-H", "%s:%s" % (local_url_addr,
1402                                    read_guts(guts, "memcached_port")),
1403                   "-u", read_guts(guts, "memcached_admin"),
1404                   "-P", memcached_pass,
1405                   "kv_trace.json"],
1406                  timeout=120,
1407                  log_file="stats.log",
1408                  change_dir=True,
1409                  artifacts=["kv_trace.json"]),
1410    ]
1411
1412    _tasks = flatten([getent_tasks,
1413                      lookup_tasks,
1414                      query_tasks,
1415                      index_tasks,
1416                      projector_tasks,
1417                      fts_tasks,
1418                      cbas_tasks,
1419                      eventing_tasks,
1420                      goxdcr_tasks,
1421                      _tasks])
1422
1423    return _tasks
1424
1425
1426def find_script(name):
1427    path = os.path.join(basedir(), "bin", name)
1428    if os.path.exists(path):
1429        log("Found %s: %s" % (name, path))
1430        return os.path.abspath(path)
1431
1432    return None
1433
1434
1435def get_server_guts(initargs_path):
1436    dump_guts_path = find_script("dump-guts")
1437
1438    if dump_guts_path is None:
1439        log("Couldn't find dump-guts script. Some information will be missing")
1440        return {}
1441
1442    # Check if initargs exists and is read accessible.
1443    if not os.path.exists(initargs_path):
1444        log("initargs file '{}' does not exist".format(initargs_path))
1445        return {}
1446
1447    if not os.access(initargs_path, os.R_OK):
1448        log("Read access to '{}' is required in order to proceed".format(
1449            initargs_path))
1450        sys.exit(1)
1451
1452    escript = exec_name("escript")
1453    extra_args = os.getenv("EXTRA_DUMP_GUTS_ARGS")
1454    args = [escript, dump_guts_path, "--initargs-path", initargs_path]
1455    if extra_args:
1456        args = args + extra_args.split(";")
1457    print("Checking for server guts in %s..." % initargs_path)
1458    p = subprocess.Popen(args, stdout=subprocess.PIPE)
1459    output = p.stdout.read().decode(LATIN1)
1460    p.wait()
1461    rc = p.returncode
1462    # print("args: %s gave rc: %d and:\n\n%s\n" % (args, rc, output))
1463    tokens = output.rstrip("\0").split("\0")
1464    d = {}
1465    if len(tokens) > 1:
1466        for i in range(0, len(tokens), 2):
1467            d[tokens[i]] = tokens[i + 1]
1468    return d
1469
1470
1471def guess_utility(command):
1472    if isinstance(command, list):
1473        command = ' '.join(command)
1474
1475    if not command:
1476        return None
1477
1478    if re.findall(r'[|;&]|\bsh\b|\bsu\b|\bfind\b|\bfor\b', command):
1479        # something hard to easily understand; let the human decide
1480        return command
1481    else:
1482        return command.split()[0]
1483
1484
1485def dump_utilities(*args, **kwargs):
1486    specific_platforms = {SolarisTask: 'Solaris',
1487                          LinuxTask: 'Linux',
1488                          WindowsTask: 'Windows',
1489                          MacOSXTask: 'Mac OS X'}
1490    platform_utils = dict((name, set())
1491                          for name in specific_platforms.values())
1492
1493    class FakeOptions(object):
1494        def __getattr__(self, name):
1495            return None
1496
1497    tasks = make_os_tasks() + make_product_task({}, "", "", FakeOptions())
1498
1499    for task in tasks:
1500        utility = guess_utility(task.command)
1501        if utility is None:
1502            continue
1503
1504        for (platform, name) in specific_platforms.items():
1505            if isinstance(task, platform):
1506                platform_utils[name].add(utility)
1507
1508    print('This is an autogenerated, possibly incomplete and flawed list '
1509          'of utilites used by cbcollect_info')
1510
1511    for (name, utilities) in sorted(
1512            platform_utils.items(), key=lambda x: x[0]):
1513        print("\n%s:" % name)
1514
1515        for utility in sorted(utilities):
1516            print("        - %s" % utility)
1517
1518    sys.exit(0)
1519
1520
1521def stdin_watcher():
1522    fd = sys.stdin.fileno()
1523
1524    while True:
1525        buf = os.read(fd, 1024)
1526        # stdin closed
1527        if not buf:
1528            break
1529
1530
1531def setup_stdin_watcher():
1532    def _in_thread():
1533        try:
1534            stdin_watcher()
1535        finally:
1536            AltExit.exit(2)
1537    th = threading.Thread(target=_in_thread, daemon=True)
1538    th.start()
1539
1540
1541class CurlKiller:
1542    def __init__(self, p):
1543        self.p = p
1544
1545    def cleanup(self):
1546        if self.p is not None:
1547            print("Killing curl...")
1548            os.kill(self.p.pid, signal.SIGKILL)
1549            print("done")
1550
1551    def disarm(self):
1552        self.p = None
1553
1554
1555def do_upload_and_exit(path, url, proxy):
1556    output_fd, output_file = tempfile.mkstemp()
1557    os.close(output_fd)
1558
1559    AltExit.register(lambda: os.unlink(output_file))
1560
1561    args = ["curl", "-sS",
1562            "--output", output_file,
1563            "--proxy", proxy,
1564            "--write-out", "%{http_code}", "--upload-file", path, url]
1565    AltExit.lock.acquire()
1566    try:
1567        p = subprocess.Popen(args, stdout=subprocess.PIPE)
1568        k = CurlKiller(p)
1569        AltExit.register_and_unlock(k.cleanup)
1570    except Exception as e:
1571        AltExit.lock.release()
1572        raise e
1573
1574    stdout, _ = p.communicate()
1575    stdout = stdout.decode(LATIN1)
1576    k.disarm()
1577
1578    if p.returncode != 0:
1579        sys.exit(1)
1580    else:
1581        if stdout.strip() == '200':
1582            log('Upload path is: %s' % url)
1583            log('Done uploading')
1584            sys.exit(0)
1585        else:
1586            log('HTTP status code: %s' % stdout)
1587            sys.exit(1)
1588
1589
1590def parse_host(host):
1591    url = urllib.parse.urlsplit(host)
1592    if not url.scheme:
1593        url = urllib.parse.urlsplit('https://' + host)
1594
1595    return url.scheme, url.netloc, url.path
1596
1597
1598def generate_upload_url(parser, options, zip_filename):
1599    upload_url = None
1600    if options.upload_host:
1601        if not options.upload_customer:
1602            parser.error("Need --customer when --upload-host is given")
1603
1604        scheme, netloc, path = parse_host(options.upload_host)
1605
1606        customer = urllib.parse.quote(options.upload_customer)
1607        fname = urllib.parse.quote(os.path.basename(zip_filename))
1608        if options.upload_ticket:
1609            full_path = '%s/%s/%d/%s' % (path,
1610                                         customer,
1611                                         options.upload_ticket,
1612                                         fname)
1613        else:
1614            full_path = '%s/%s/%s' % (path, customer, fname)
1615
1616        upload_url = urllib.parse.urlunsplit(
1617            (scheme, netloc, full_path, '', ''))
1618        log("Will upload collected .zip file into %s" % upload_url)
1619    return upload_url
1620
1621
1622def check_ticket(option, opt, value):
1623    if re.match(r'^\d{1,7}$', value):
1624        return int(value)
1625    else:
1626        raise optparse.OptionValueError(
1627            "option %s: invalid ticket number: %r" % (opt, value))
1628
1629
1630class CbcollectInfoOptions(optparse.Option):
1631    from copy import copy
1632
1633    TYPES = optparse.Option.TYPES + ("ticket",)
1634    TYPE_CHECKER = copy(optparse.Option.TYPE_CHECKER)
1635    TYPE_CHECKER["ticket"] = check_ticket
1636
1637
1638def main():
1639    # ask all tools to use C locale (MB-12050)
1640    os.environ['LANG'] = 'C'
1641    os.environ['LC_ALL'] = 'C'
1642    if 'HOME' not in os.environ:
1643        os.environ['HOME'] = basedir()
1644
1645    rootdir = basedir()
1646    # (MB-8239)erl script fails in OSX as it is unable to find COUCHBASE_TOP -ravi
1647    if platform.system() == 'Darwin':
1648        os.environ["COUCHBASE_TOP"] = rootdir
1649
1650    parser = optparse.OptionParser(
1651        usage=USAGE, option_class=CbcollectInfoOptions)
1652    parser.add_option("-r", dest="root",
1653                      help="root directory - defaults to %s" % (rootdir),
1654                      default=rootdir)
1655    parser.add_option("-v", dest="verbosity", help="increase verbosity level",
1656                      action="count", default=0)
1657    parser.add_option(
1658        "-p",
1659        dest="product_only",
1660        help="gather only product related information",
1661        action="store_true",
1662        default=False)
1663    parser.add_option("-d", action="callback", callback=dump_utilities,
1664                      help="dump a list of commands that cbcollect_info needs")
1665    parser.add_option("--watch-stdin", dest="watch_stdin",
1666                      action="store_true", default=False,
1667                      help=optparse.SUPPRESS_HELP)
1668    parser.add_option("--initargs", dest="initargs",
1669                      help="server 'initargs' path")
1670    parser.add_option(
1671        "--log-redaction-level",
1672        dest="redact_level",
1673        default="none",
1674        help="redaction level for the logs collected, none and partial supported (default is none)")
1675    parser.add_option("--log-redaction-salt", dest="salt_value",
1676                      default=str(uuid.uuid4()),
1677                      help="Is used to salt the hashing of tagged data, \
1678                            defaults to random uuid. If input by user it should \
1679                            be provided along with --log-redaction-level option")
1680    parser.add_option("--just-upload-into", dest="just_upload_into",
1681                      help=optparse.SUPPRESS_HELP)
1682    parser.add_option(
1683        "--upload-host",
1684        dest="upload_host",
1685        help="gather diagnostics and upload it for couchbase support. Gives upload host")
1686    parser.add_option("--customer", dest="upload_customer",
1687                      help="specifies customer name for upload")
1688    parser.add_option("--ticket", dest="upload_ticket", type='ticket',
1689                      help="specifies support ticket number for upload")
1690    parser.add_option("--bypass-sensitive-data", dest="bypass_sensitive_data",
1691                      action="store_true", default=False,
1692                      help="do not collect sensitive data")
1693    parser.add_option(
1694        "--task-regexp",
1695        dest="task_regexp",
1696        default="",
1697        help="Run only tasks matching regexp. For debugging purposes only.")
1698    parser.add_option(
1699        "--tmp-dir",
1700        dest="tmp_dir",
1701        default=None,
1702        help="set the temp dir used while processing collected data. Overrides the TMPDIR env variable if set")
1703    parser.add_option("--upload-proxy", dest="upload_proxy", default="",
1704                      help="specifies proxy for upload")
1705    options, args = parser.parse_args()
1706
1707    if len(args) != 1:
1708        parser.error(
1709            "incorrect number of arguments. Expecting filename to collect diagnostics into")
1710
1711    if options.watch_stdin:
1712        setup_stdin_watcher()
1713
1714    zip_filename = args[0]
1715    if zip_filename[-4:] != '.zip':
1716        zip_filename = zip_filename + '.zip'
1717
1718    zip_dir = os.path.dirname(os.path.abspath(zip_filename))
1719
1720    if not os.access(zip_dir, os.W_OK | os.X_OK):
1721        print("do not have write access to the directory %s" % (zip_dir))
1722        sys.exit(1)
1723
1724    if options.redact_level != "none" and options.redact_level != "partial":
1725        parser.error(
1726            "Invalid redaction level. Only 'none' and 'partial' are supported.")
1727
1728    redact_zip_file = zip_filename[:-4] + "-redacted" + zip_filename[-4:]
1729    upload_url = ""
1730    if options.redact_level != "none":
1731        upload_url = generate_upload_url(parser, options, redact_zip_file)
1732    else:
1733        upload_url = generate_upload_url(parser, options, zip_filename)
1734
1735    bindir = os.path.join(options.root, 'bin')
1736    if os.name == 'posix':
1737        path = [bindir,
1738                '/opt/couchbase/bin',
1739                os.environ['PATH'],
1740                '/bin',
1741                '/sbin',
1742                '/usr/bin',
1743                '/usr/sbin']
1744        os.environ['PATH'] = ':'.join(path)
1745
1746        library_path = [os.path.join(options.root, 'lib')]
1747
1748        current_library_path = os.environ.get('LD_LIBRARY_PATH')
1749        if current_library_path is not None:
1750            library_path.append(current_library_path)
1751
1752        os.environ['LD_LIBRARY_PATH'] = ':'.join(library_path)
1753    elif os.name == 'nt':
1754        path = [bindir, os.environ['PATH']]
1755        os.environ['PATH'] = ';'.join(path)
1756
1757    if options.just_upload_into is not None:
1758        do_upload_and_exit(args[0], options.just_upload_into,
1759                           options.upload_proxy)
1760
1761    runner = TaskRunner(verbosity=options.verbosity,
1762                        task_regexp=options.task_regexp,
1763                        tmp_dir=options.tmp_dir,
1764                        salt_value=options.salt_value)
1765    # We want this at the top of couchbase.log
1766    runner.run(make_redaction_task())
1767
1768    if not options.product_only:
1769        runner.run_tasks(make_os_tasks())
1770
1771    initargs_variants = [
1772        os.path.abspath(
1773            os.path.join(
1774                options.root,
1775                "var",
1776                "lib",
1777                "couchbase",
1778                "initargs")),
1779        "/opt/couchbase/var/lib/couchbase/initargs",
1780        os.path.expanduser("~/Library/Application Support/Couchbase/var/lib/couchbase/initargs")]
1781
1782    if options.initargs is not None:
1783        initargs_variants = [options.initargs]
1784
1785    guts = None
1786    guts_initargs_path = None
1787
1788    for initargs_path in initargs_variants:
1789        d = get_server_guts(initargs_path)
1790        # print("for initargs: %s got:\n%s" % (initargs_path, d))
1791        if len(d) > 0:
1792            guts = d
1793            guts_initargs_path = os.path.abspath(initargs_path)
1794            break
1795
1796    if guts is None:
1797        log("Couldn't read server guts. Using some default values.")
1798
1799        prefix = None
1800        if platform.system() == 'Windows':
1801            prefix = 'c:/Program Files/Couchbase/Server'
1802        elif platform.system() == 'Darwin':
1803            prefix = '~/Library/Application Support/Couchbase'
1804        else:
1805            prefix = '/opt/couchbase'
1806
1807        guts = {
1808            "db_dir": os.path.join(
1809                prefix,
1810                "var/lib/couchbase/data"),
1811            "idx_dir": os.path.join(
1812                prefix,
1813                "var/lib/couchbase/data"),
1814            "ns_log_path": os.path.join(
1815                prefix,
1816                "var/lib/couchbase/ns_log"),
1817            "log_path": os.path.join(
1818                prefix,
1819                "var/lib/couchbase/logs"),
1820            "memcached_logs_path": os.path.join(
1821                prefix,
1822                "var/lib/couchbase/logs")}
1823
1824        guts_initargs_path = os.path.abspath(prefix)
1825
1826    ipv6 = read_guts(guts, "ipv6") == "true"
1827    set_local_addr(ipv6)
1828
1829    memcached_password = get_diag_password(guts)
1830
1831    zip_node = read_guts(guts, "node")
1832    runner.literal(
1833        "product diag header", "Found server initargs at %s (%d)" %
1834        (guts_initargs_path, len(guts)))
1835
1836    runner.run_tasks(make_product_task(guts, guts_initargs_path,
1837                                       memcached_password, options))
1838
1839    # Collect breakpad crash dumps.
1840    if options.bypass_sensitive_data:
1841        log("Bypassing Sensitive Data: Breakpad crash dumps")
1842    else:
1843        memcached_breakpad_minidump_dir = read_guts(
1844            guts, "memcached_breakpad_minidump_dir")
1845        for dump in glob.glob(os.path.join(
1846                memcached_breakpad_minidump_dir, "*.dmp")):
1847            runner.collect_file(dump)
1848
1849        # Collect indexer breakpad minidumps
1850        index_port = read_guts(guts, "indexer_http_port")
1851        if index_port:
1852            indexer_breakpad_minidump_dir = read_guts(
1853                guts, "indexer_breakpad_minidump_dir")
1854            if memcached_breakpad_minidump_dir != indexer_breakpad_minidump_dir:
1855                for dump in glob.glob(os.path.join(
1856                        indexer_breakpad_minidump_dir, "*.dmp")):
1857                    runner.collect_file(dump)
1858
1859    fts_port = read_guts(guts, "fts_http_port")
1860    if fts_port:
1861        idx_dir = read_guts(guts, "idx_dir")
1862        for dump in glob.glob(os.path.join(
1863                idx_dir, "@fts", "dumps", "*.dump.txt")):
1864            runner.collect_file(dump)
1865
1866    addr = zip_node.split("@")[-1]
1867    if addr == "127.0.0.1" or addr == "::1":
1868        zip_node = '@'.join(zip_node.split(
1869            "@")[:-1] + [find_primary_addr(ipv6, addr)])
1870
1871    if options.verbosity:
1872        log("Python version: %s" % sys.version)
1873
1874    runner.literal("cbcollect_info log", log_stream.getvalue(),
1875                   log_file="cbcollect_info.log", no_header=True)
1876
1877    runner.close_all_files()
1878
1879    if options.redact_level != "none":
1880        log("Redacting log files to level: %s" % options.redact_level)
1881        runner.redact_and_zip(redact_zip_file, zip_node)
1882
1883    runner.zip(zip_filename, zip_node)
1884
1885    if upload_url and options.redact_level != "none":
1886        do_upload_and_exit(redact_zip_file, upload_url, options.upload_proxy)
1887    elif upload_url:
1888        do_upload_and_exit(zip_filename, upload_url, options.upload_proxy)
1889
1890
1891def find_primary_addr(ipv6, default=None):
1892    Family = socket.AF_INET6 if ipv6 else socket.AF_INET
1893    DnsAddr = "2001:4860:4860::8844" if ipv6 else "8.8.8.8"
1894    s = socket.socket(Family, socket.SOCK_DGRAM)
1895    try:
1896        s.connect((DnsAddr, 56))
1897        if ipv6:
1898            addr, port, _, _ = s.getsockname()
1899        else:
1900            addr, port = s.getsockname()
1901
1902        return addr
1903    except socket.error:
1904        return default
1905    finally:
1906        s.close()
1907
1908
1909def exec_name(name):
1910    if sys.platform == 'win32':
1911        name += ".exe"
1912    return name
1913
1914
1915if __name__ == '__main__':
1916    main()
1917