1#!/usr/bin/env python
2# -*- python -*-
3#
4# @author Couchbase <info@couchbase.com>
5# @copyright 2011-2018 Couchbase, Inc.
6#
7# Licensed under the Apache License, Version 2.0 (the "License");
8# you may not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11#      http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18import os
19import sys
20import tempfile
21import time
22import subprocess
23import string
24import re
25import platform
26import glob
27import socket
28import threading
29import optparse
30import atexit
31import signal
32import urllib
33import shutil
34import urlparse
35import errno
36import hashlib
37import uuid
38from datetime import datetime, timedelta, tzinfo
39from StringIO import StringIO
40
41class AltExitC(object):
42    def __init__(self):
43        self.list = []
44        self.lock = threading.Lock()
45        atexit.register(self.at_exit_handler)
46
47    def register(self, f):
48        self.lock.acquire()
49        self.register_and_unlock(f)
50
51    def register_and_unlock(self, f):
52        try:
53            self.list.append(f)
54        finally:
55            self.lock.release()
56
57    def at_exit_handler(self):
58        self.lock.acquire()
59        self.list.reverse()
60        for f in self.list:
61            try:
62                f()
63            except:
64                pass
65
66    def exit(self, status):
67        self.at_exit_handler()
68        os._exit(status)
69
70AltExit = AltExitC()
71
72USAGE = """usage: %prog [options] output_file.zip
73
74- Linux/Windows/OSX:
75    %prog output_file.zip
76    %prog -v output_file.zip"""
77
78# adapted from pytz
79class FixedOffsetTZ(tzinfo):
80    def __init__(self, minutes):
81        if abs(minutes) >= 1440:
82            raise ValueError("absolute offset is too large", minutes)
83        self._minutes = minutes
84        self._offset = timedelta(minutes=minutes)
85
86    def utcoffset(self, dt):
87        return self._offset
88
89    def dst(self, dt):
90        return timedelta(0)
91
92    def tzname(self, dt):
93        return None
94
95local_tz = FixedOffsetTZ(minutes=time.timezone / 60)
96log_stream = StringIO()
97local_addr = None
98local_url_addr = None
99
100def set_local_addr(ipv6):
101    global local_addr
102    global local_url_addr
103
104    local_addr = "::1" if ipv6 else "127.0.0.1"
105    local_url_addr = "[::1]" if ipv6 else "127.0.0.1"
106
107log_line = None
108def buffer_log_line(message, new_line):
109    global log_line
110
111    line = log_line
112    if line is None:
113        now = datetime.now(tz=local_tz)
114        line = '[%s] ' % now.isoformat()
115
116    line += message
117    if new_line:
118        log_line = None
119        return line
120    else:
121        log_line = line
122        return None
123
124def log(message, new_line=True):
125    global log_stream
126
127    if new_line:
128        message += '\n'
129
130    bufline = buffer_log_line(message, new_line)
131    if bufline is not None:
132        log_stream.write(bufline)
133
134    sys.stderr.write(message)
135    sys.stderr.flush()
136
137class AccessLogProcessor:
138    def __init__(self, salt):
139        self.salt = salt
140        self.column_parser = re.compile(
141            r'(^\S* \S* )(\S*)( \[.*\] \"\S* )(\S*)( .*$)')
142        self.urls_to_redact = [['/settings/rbac/users',
143                                re.compile(r'\/(?P<user>[^\/\s#&]+)([#&]|$)'),
144                                self._process_user, "user"],
145                               ['/_cbauth/checkPermission',
146                                re.compile(r'user=(?P<user>[^\s&#]+)'),
147                                self._process_user, "user"],
148                               ['/pools/default/buckets',
149                                re.compile(r'\/(?:[^\/\s#&]+)\/docs\/'
150                                            '(?P<docid>[^\/\s#&]+)$'),
151                                self._process_docid, "docid"]]
152
153    def _process_url(self, surl):
154        for conf in self.urls_to_redact:
155            prefix = conf[0]
156            if surl[:len(prefix)] == prefix:
157                return prefix + self._process_url_tail(conf[1], conf[2],
158                                                       conf[3],
159                                                       surl[len(prefix):])
160        return surl
161
162    def _process_url_tail(self, rex, fn, key, s):
163        m = rex.search(s)
164        if m != None:
165            return s[:m.start(key)] + fn(m.group(key)) + s[m.end(key):]
166        else:
167            return s
168
169    def _process_user(self, user):
170        if user == '-' or user[0] == '@':
171            return user
172        elif user[-3:] == "/UI":
173            return self._hash(user[:-3]) + "/UI"
174        else:
175            return self._hash(user)
176
177    def _process_docid(self, docid):
178        return self._hash(docid)
179
180    def _hash(self, token):
181        return hashlib.sha1(self.salt + token).hexdigest()
182
183    def _repl_columns(self, matchobj):
184        return matchobj.group(1) + \
185            self._process_user(matchobj.group(2)) + \
186            matchobj.group(3) + \
187            self._process_url(matchobj.group(4)) + \
188            matchobj.group(5)
189
190    def do(self, line):
191        return self.column_parser.sub(self._repl_columns, line)
192
193class RegularLogProcessor:
194    rexes = [re.compile('(<ud>)(.+?)(</ud>)'),
195             re.compile('("--log-redaction-salt=)(.+?)(")')]
196
197    def __init__(self, salt):
198        self.salt = salt
199
200    def _hash(self, match):
201        h = hashlib.sha1(self.salt + match.group(2)).hexdigest()
202        return match.group(1) + h + match.group(3)
203
204    def _process_line(self, line):
205        for rex in self.rexes:
206            line = rex.sub(self._hash, line)
207        return line
208
209    def do(self, line):
210        return self._process_line(line)
211
212class CouchbaseLogProcessor(RegularLogProcessor):
213    rexes = [re.compile('(--log-redaction-salt=)(.+?)( )')]
214
215    def do(self, line):
216        if "RedactLevel" in line:
217            # salt + salt to maintain consistency with other
218            # occurances of hashed salt in the logs.
219            return 'RedactLevel:partial,HashOfSalt:%s\n' \
220                % hashlib.sha1(self.salt + self.salt).hexdigest()
221        else:
222            return self._process_line(line)
223
224class LogRedactor:
225    def __init__(self, salt, tmpdir, default_name):
226        self.default_name = default_name
227        self.target_dir = os.path.join(tmpdir, "redacted")
228        os.makedirs(self.target_dir)
229
230        self.access_log = AccessLogProcessor(salt)
231        self.couchbase_log = CouchbaseLogProcessor(salt)
232        self.regular_log = RegularLogProcessor(salt)
233
234    def _process_file(self, ifile, ofile, processor):
235        try:
236            with open(ifile, 'r') as inp:
237                with open(ofile, 'w+') as out:
238                    for line in inp:
239                        out.write(processor.do(line))
240        except IOError as e:
241            log("I/O error(%s): %s" % (e.errno, e.strerror))
242
243    def redact_file(self, name, ifile):
244        ofile = os.path.join(self.target_dir, name)
245        if "http_access" in name:
246            self._process_file(ifile, ofile, self.access_log)
247        elif name == self.default_name:
248            self._process_file(ifile, ofile, self.couchbase_log)
249        else:
250            self._process_file(ifile, ofile, self.regular_log)
251        return ofile
252
253class Task(object):
254    privileged = False
255    no_header = False
256    num_samples = 1
257    interval = 0
258    def __init__(self, description, command, timeout=None, **kwargs):
259        self.description = description
260        self.command = command
261        self.timeout = timeout
262        self.__dict__.update(kwargs)
263        self._is_posix = (os.name == 'posix')
264
265    def _platform_popen_flags(self):
266        flags = {}
267        if self._is_posix:
268            flags['preexec_fn'] = os.setpgrp
269
270        return flags
271
272    def _can_kill(self, p):
273        if self._is_posix:
274            return True
275
276        return hasattr(p, 'kill')
277
278    def _kill(self, p):
279        if self._is_posix:
280            group_pid = os.getpgid(p.pid)
281            os.killpg(group_pid, signal.SIGKILL)
282        else:
283            p.kill()
284
285    def _env_flags(self):
286        flags = {}
287        if hasattr(self, 'addenv'):
288            env = os.environ.copy()
289            env.update(self.addenv)
290            flags['env'] = env
291
292        return flags
293
294    def _cwd_flags(self):
295        flags = {}
296        if getattr(self, 'change_dir', False):
297            cwd = self._task_runner.tmpdir
298            if isinstance(self.change_dir, str):
299                cwd = self.change_dir
300
301            flags['cwd'] = cwd
302
303        return flags
304
305    def _extra_flags(self):
306        flags = self._env_flags()
307        flags.update(self._platform_popen_flags())
308        flags.update(self._cwd_flags())
309
310        return flags
311
312    def set_task_runner(self, runner):
313        self._task_runner = runner
314
315    def execute(self, fp):
316        """Run the task"""
317        use_shell = not isinstance(self.command, list)
318        extra_flags = self._extra_flags()
319        try:
320            p = subprocess.Popen(self.command, bufsize=-1,
321                                 stdin=subprocess.PIPE,
322                                 stdout=subprocess.PIPE,
323                                 stderr=subprocess.STDOUT,
324                                 shell=use_shell,
325                                 **extra_flags)
326            if hasattr(self, 'to_stdin'):
327                p.stdin.write(self.to_stdin)
328
329            p.stdin.close()
330
331        except OSError, e:
332            # if use_shell is False then Popen may raise exception
333            # if binary is missing. In this case we mimic what
334            # shell does. Namely, complaining to stderr and
335            # setting non-zero status code. It's might also
336            # automatically handle things like "failed to fork due
337            # to some system limit".
338            print >> fp, "Failed to execute %s: %s" % (self.command, e)
339            return 127
340
341        except IOError, e:
342            if e.errno == errno.EPIPE:
343                print >> fp, "Ignoring broken pipe on stdin for %s" % self.command
344            else:
345                raise
346
347        from threading import Timer, Event
348
349        timer = None
350        timer_fired = Event()
351
352        if self.timeout is not None and self._can_kill(p):
353            def on_timeout():
354                try:
355                    self._kill(p)
356                except:
357                    # the process might have died already
358                    pass
359
360                timer_fired.set()
361
362            timer = Timer(self.timeout, on_timeout)
363            timer.start()
364
365        try:
366            while True:
367                data = p.stdout.read(64 * 1024)
368                if not data:
369                    break
370
371                fp.write(data)
372        finally:
373            if timer is not None:
374                timer.cancel()
375                timer.join()
376
377                # there's a tiny chance that command succeeds just before
378                # timer is fired; that would result in a spurious timeout
379                # message
380                if timer_fired.isSet():
381                    print >> fp, "`%s` timed out after %s seconds" % (self.command, self.timeout)
382                    log("[Command timed out after %s seconds] - " % (self.timeout), new_line=False)
383
384        return p.wait()
385
386    def will_run(self):
387        """Determine if this task will run on this platform."""
388        return sys.platform in self.platforms
389
390
391class TaskRunner(object):
392    default_name = "couchbase.log"
393
394    def __init__(self, verbosity=0, task_regexp='', tmp_dir=None,
395                 salt_value=""):
396        self.files = {}
397        self.verbosity = verbosity
398        self.start_time = time.strftime("%Y%m%d-%H%M%S", time.gmtime())
399        self.salt_value = salt_value
400
401        # Depending on platform, mkdtemp() may act unpredictably if passed an empty string.
402        if not tmp_dir:
403            tmp_dir = None
404        else:
405            tmp_dir = os.path.abspath(os.path.expanduser(tmp_dir))
406
407        try:
408            self.tmpdir = tempfile.mkdtemp(dir=tmp_dir)
409        except OSError as e:
410           print "Could not use temporary dir {0}: {1}".format(tmp_dir, e)
411           sys.exit(1)
412
413        # If a dir wasn't passed by --tmp-dir, check if the env var was set and if we were able to use it
414        if not tmp_dir and os.getenv("TMPDIR") and os.path.split(self.tmpdir)[0] != os.getenv("TMPDIR"):
415                log("Could not use TMPDIR {0}".format(os.getenv("TMPDIR")))
416        log("Using temporary dir {0}".format(os.path.split(self.tmpdir)[0]))
417
418        self.task_regexp = re.compile(task_regexp)
419
420        AltExit.register(self.finalize)
421
422    def finalize(self):
423        try:
424            for fp in self.files.iteritems():
425                fp.close()
426        except:
427            pass
428
429        shutil.rmtree(self.tmpdir, ignore_errors=True)
430
431    def collect_file(self, filename):
432        """Add a file to the list of files collected. Used to capture the exact
433        file (including timestamps) from the Couchbase instance.
434
435        filename - Absolute path to file to collect.
436        """
437        if not filename in self.files:
438            try:
439                self.files[filename] = open(filename, 'r')
440            except IOError, e:
441                log("Failed to collect file '%s': %s" % (filename, str(e)))
442        else:
443            log("Unable to collect file '%s' - already collected." % filename)
444
445    def get_file(self, filename):
446        if filename in self.files:
447            fp = self.files[filename]
448        else:
449            fp = open(os.path.join(self.tmpdir, filename), 'w+')
450            self.files[filename] = fp
451
452        return fp
453
454    def header(self, fp, title, subtitle):
455        separator = '=' * 78
456        print >> fp, separator
457        print >> fp, title
458        print >> fp, subtitle
459        print >> fp, separator
460        fp.flush()
461
462    def log_result(self, result):
463        if result == 0:
464            log("OK")
465        else:
466            log("Exit code %d" % result)
467
468    def run_tasks(self, tasks):
469        for task in tasks:
470            self.run(task)
471
472    def run(self, task):
473        if self.task_regexp.match(task.description) is None:
474                log("Skipping task %s because "
475                    "it doesn't match '%s'" % (task.description,
476                                               self.task_regexp.pattern))
477        else:
478            self._run(task)
479
480    def _run(self, task):
481        """Run a task with a file descriptor corresponding to its log file"""
482        if task.will_run():
483            log("%s (%s) - " % (task.description, task.command), new_line=False)
484            if task.privileged and os.getuid() != 0:
485                log("skipped (needs root privs)")
486                return
487
488            task.set_task_runner(self)
489
490            filename = getattr(task, 'log_file', self.default_name)
491            fp = self.get_file(filename)
492            if not task.no_header:
493                self.header(fp, task.description, task.command)
494
495            for i in xrange(task.num_samples):
496                if i > 0:
497                    log("Taking sample %d after %f seconds - " % (i+1, task.interval), new_line=False)
498                    time.sleep(task.interval)
499                result = task.execute(fp)
500                self.log_result(result)
501
502            for artifact in getattr(task, 'artifacts', []):
503                path = artifact
504                if not os.path.isabs(path):
505                    # we assume that "relative" artifacts are produced in the
506                    # self.tmpdir
507                    path = os.path.join(self.tmpdir, path)
508
509                self.collect_file(path)
510
511            fp.flush()
512
513        elif self.verbosity >= 2:
514            log('Skipping "%s" (%s): not for platform %s' % (task.description, task.command, sys.platform))
515
516    def literal(self, description, value, **kwargs):
517        self.run(LiteralTask(description, value, **kwargs))
518
519
520    def redact_and_zip(self, filename, node):
521        files = []
522        redactor = LogRedactor(self.salt_value, self.tmpdir, self.default_name)
523
524        for name, fp in self.files.iteritems():
525            if "users.dets" in name:
526                continue
527            files.append(redactor.redact_file(name, fp.name))
528
529        prefix = "cbcollect_info_%s_%s" % (node, self.start_time)
530        self._zip_helper(prefix, filename, files)
531
532    def close_all_files(self):
533        for name, fp in self.files.iteritems():
534            fp.close()
535
536    def zip(self, filename, node):
537        prefix = "cbcollect_info_%s_%s" % (node, self.start_time)
538
539        files = []
540        for name, fp in self.files.iteritems():
541            files.append(fp.name)
542        self._zip_helper(prefix, filename, files)
543
544    def _zip_helper(self, prefix, filename, files):
545        """Write all our logs to a zipfile"""
546        exe = exec_name("gozip")
547
548        fallback = False
549
550        try:
551            p = subprocess.Popen([exe, "-strip-path", "-prefix", prefix, filename] + files,
552                                 stderr=subprocess.STDOUT,
553                                 stdin=subprocess.PIPE)
554            p.stdin.close()
555            status = p.wait()
556
557            if status != 0:
558                log("gozip terminated with non-zero exit code (%d)" % status)
559        except OSError, e:
560            log("Exception during compression: %s" % e)
561            fallback = True
562
563        if fallback:
564            log("IMPORTANT:")
565            log("  Compression using gozip failed.")
566            log("  Falling back to python implementation.")
567            log("  Please let us know about this and provide console output.")
568
569            self._zip_fallback(filename, prefix, files)
570
571    def _zip_fallback(self, filename, prefix, files):
572        from zipfile import ZipFile, ZIP_DEFLATED
573        zf = ZipFile(filename, mode='w', compression=ZIP_DEFLATED)
574        try:
575            for name in files:
576                zf.write(name,
577                         "%s/%s" % (prefix, os.path.basename(name)))
578        finally:
579            zf.close()
580
581class SolarisTask(Task):
582    platforms = ['sunos5', 'solaris']
583
584
585class LinuxTask(Task):
586    platforms = ['linux2']
587
588
589class WindowsTask(Task):
590    platforms = ['win32', 'cygwin']
591
592
593class MacOSXTask(Task):
594    platforms = ['darwin']
595
596
597class UnixTask(SolarisTask, LinuxTask, MacOSXTask):
598    platforms = SolarisTask.platforms + LinuxTask.platforms + MacOSXTask.platforms
599
600
601class AllOsTask(UnixTask, WindowsTask):
602    platforms = UnixTask.platforms + WindowsTask.platforms
603
604class LiteralTask(AllOsTask):
605    def __init__(self, description, literal, **kwargs):
606        self.description = description
607        self.command = ''
608        self.literal = literal
609        self.__dict__.update(kwargs)
610
611    def execute(self, fp):
612        print >> fp, self.literal
613        return 0
614
615class CollectFile(AllOsTask):
616    def __init__(self, description, file_path, **kwargs):
617        self.description = description
618        self.command = ''
619        self.file_path = file_path
620        self.__dict__.update(kwargs)
621
622    def execute(self, fp):
623        self._task_runner.collect_file(self.file_path)
624        print >> fp, "Collected file %s" % self.file_path
625        return 0
626
627def make_curl_task(name, user, password, url,
628                   timeout=60, log_file="couchbase.log", base_task=AllOsTask,
629                   **kwargs):
630    return base_task(name, ["curl", "-sS", "--proxy", "", "-K-", url],
631                     timeout=timeout,
632                     log_file=log_file,
633                     to_stdin="--user %s:%s" % (user, password),
634                     **kwargs)
635
636def make_cbstats_task(kind, memcached_pass, guts):
637    port = read_guts(guts, "memcached_port")
638    user = read_guts(guts, "memcached_admin")
639    return AllOsTask("memcached stats %s" % kind,
640                     flatten(["cbstats", "-a", "%s:%s" % (local_url_addr, port), kind, "-u", user]),
641                     log_file="stats.log",
642                     timeout=60,
643                     addenv=[("CB_PASSWORD", memcached_pass)])
644
645def get_local_token(guts, port):
646    path = read_guts(guts, "localtoken_path")
647    token = ""
648    try:
649        with open(path, 'r') as f:
650            token = f.read().rstrip('\n')
651    except IOError as e:
652        log("I/O error(%s): %s" % (e.errno, e.strerror))
653    return token
654
655def get_diag_password(guts):
656    port = read_guts(guts, "rest_port")
657    pwd = get_local_token(guts, port)
658    url = "http://%s:%s/diag/password" % (local_url_addr, port)
659    command = ["curl", "-sS", "--proxy", "", "-u", "@localtoken:%s" % pwd, url]
660
661    task = AllOsTask("get diag password", command, timeout=60)
662    output = StringIO()
663    if task.execute(output) == 0:
664        return output.getvalue()
665    else:
666        log(output.getvalue())
667        return ""
668
669def make_query_task(statement, user, password, port):
670    url = "http://%s:%s/query/service?statement=%s" % (local_url_addr, port, urllib.quote(statement))
671
672    return make_curl_task(name="Result of query statement \'%s\'" % statement,
673                          user=user, password=password, url=url)
674
675def make_index_task(name, api, passwd, index_port, logfile="couchbase.log"):
676    index_url = 'http://%s:%s/%s' % (local_url_addr, index_port, api)
677
678    return make_curl_task(name, "@", passwd, index_url, log_file=logfile)
679
680def make_redaction_task():
681    return LiteralTask("Log Redaction", "RedactLevel:none")
682
683def basedir():
684    mydir = os.path.dirname(sys.argv[0])
685    if mydir == "":
686        mydir = "."
687    return mydir
688
689def make_event_log_task():
690    from datetime import datetime, timedelta
691
692    # I found that wmic ntevent can be extremely slow; so limiting the output
693    # to approximately last month
694    limit = datetime.today() - timedelta(days=31)
695    limit = limit.strftime('%Y%m%d000000.000000-000')
696
697    return WindowsTask("Event log",
698                       "wmic ntevent where "
699                       "\""
700                       "(LogFile='application' or LogFile='system') and "
701                       "EventType<3 and TimeGenerated>'%(limit)s'"
702                       "\" "
703                       "get TimeGenerated,LogFile,SourceName,EventType,Message "
704                       "/FORMAT:list" % locals())
705
706
707def make_os_tasks():
708    programs = " ".join(["moxi", "memcached", "beam.smp",
709                         "couch_compact", "godu", "sigar_port",
710                         "cbq-engine", "indexer", "projector", "goxdcr",
711                         "cbft", "eventing-producer", "eventing-consumer"])
712
713    _tasks = [
714        UnixTask("uname", "uname -a"),
715        UnixTask("time and TZ", "date; date -u"),
716        UnixTask("ntp time",
717                 "ntpdate -q pool.ntp.org || "
718                 "nc time.nist.gov 13 || "
719                 "netcat time.nist.gov 13", timeout=60),
720        UnixTask("ntp peers", "ntpq -p"),
721        UnixTask("raw /etc/sysconfig/clock", "cat /etc/sysconfig/clock"),
722        UnixTask("raw /etc/timezone", "cat /etc/timezone"),
723        WindowsTask("System information", "systeminfo"),
724        WindowsTask("Computer system", "wmic computersystem"),
725        WindowsTask("Computer OS", "wmic os"),
726        LinuxTask("System Hardware", "lshw -json || lshw"),
727        SolarisTask("Process list snapshot", "prstat -a -c -n 100 -t -v -L 1 10"),
728        SolarisTask("Process list", "ps -ef"),
729        SolarisTask("Service configuration", "svcs -a"),
730        SolarisTask("Swap configuration", "swap -l"),
731        SolarisTask("Disk activity", "zpool iostat 1 10"),
732        SolarisTask("Disk activity", "iostat -E 1 10"),
733        LinuxTask("Process list snapshot", "export TERM=''; top -Hb -n1 || top -H n1"),
734        LinuxTask("Process list", "ps -AwwL -o user,pid,lwp,ppid,nlwp,pcpu,maj_flt,min_flt,pri,nice,vsize,rss,tty,stat,wchan:12,start,bsdtime,command"),
735        LinuxTask("Raw /proc/buddyinfo", "cat /proc/buddyinfo"),
736        LinuxTask("Raw /proc/meminfo", "cat /proc/meminfo"),
737        LinuxTask("Raw /proc/pagetypeinfo", "cat /proc/pagetypeinfo"),
738        LinuxTask("Raw /proc/zoneinfo", "cat /proc/zoneinfo"),
739        LinuxTask("Raw /proc/vmstat", "cat /proc/vmstat"),
740        LinuxTask("Raw /proc/mounts", "cat /proc/mounts"),
741        LinuxTask("Raw /proc/partitions", "cat /proc/partitions"),
742        LinuxTask("Raw /proc/diskstats", "cat /proc/diskstats; echo ''", num_samples=10, interval=1),
743        LinuxTask("Raw /proc/interrupts", "cat /proc/interrupts"),
744        LinuxTask("Swap configuration", "free -t"),
745        LinuxTask("Swap configuration", "swapon -s"),
746        LinuxTask("Kernel modules", "lsmod"),
747        LinuxTask("Distro version", "cat /etc/redhat-release"),
748        LinuxTask("Distro version", "lsb_release -a"),
749        LinuxTask("Distro version", "cat /etc/SuSE-release"),
750        LinuxTask("Distro version", "cat /etc/issue"),
751        LinuxTask("Installed software", "rpm -qa"),
752        LinuxTask("Hot fix list", "rpm -V couchbase-server"),
753        # NOTE: AFAIK columns _was_ necessary, but it doesn't appear to be
754        # required anymore. I.e. dpkg -l correctly detects stdout as not a
755        # tty and stops playing smart on formatting. Lets keep it for few
756        # years and then drop, however.
757        LinuxTask("Installed software", "COLUMNS=300 dpkg -l"),
758        # NOTE: -V is supported only from dpkg v1.17.2 onwards.
759        LinuxTask("Hot fix list", "COLUMNS=300 dpkg -V couchbase-server"),
760        LinuxTask("Extended iostat", "iostat -x -p ALL 1 10 || iostat -x 1 10"),
761        LinuxTask("Core dump settings", "find /proc/sys/kernel -type f -name '*core*' -print -exec cat '{}' ';'"),
762        UnixTask("sysctl settings", "sysctl -a"),
763        LinuxTask("Relevant lsof output",
764                  "echo %(programs)s | xargs -n1 pgrep | xargs -n1 -r -- lsof -n -p" % locals()),
765        LinuxTask("LVM info", "lvdisplay"),
766        LinuxTask("LVM info", "vgdisplay"),
767        LinuxTask("LVM info", "pvdisplay"),
768        LinuxTask("Block device queue settings",
769                  "find /sys/block/*/queue -type f | xargs grep -vH xxxx | sort"),
770        MacOSXTask("Process list snapshot", "top -l 1"),
771        MacOSXTask("Disk activity", "iostat 1 10"),
772        MacOSXTask("Process list",
773                   "ps -Aww -o user,pid,lwp,ppid,nlwp,pcpu,pri,nice,vsize,rss,tty,"
774                   "stat,wchan:12,start,bsdtime,command"),
775        WindowsTask("Installed software", "wmic product get name, version"),
776        WindowsTask("Service list", "wmic service where state=\"running\" GET caption, name, state"),
777        WindowsTask("Process list", "wmic process"),
778        WindowsTask("Process usage", "tasklist /V /fo list"),
779        WindowsTask("Swap settings", "wmic pagefile"),
780        WindowsTask("Disk partition", "wmic partition"),
781        WindowsTask("Disk volumes", "wmic volume"),
782        UnixTask("Network configuration", "ifconfig -a", interval=10,
783                 num_samples=2),
784        LinuxTask("Network configuration", "echo link addr neigh rule route netns | xargs -n1 -- sh -x -c 'ip $1 list' --"),
785        WindowsTask("Network configuration", "ipconfig /all", interval=10,
786                    num_samples=2),
787        LinuxTask("Raw /proc/net/dev", "cat /proc/net/dev"),
788        LinuxTask("Network link statistics", "ip -s link"),
789        UnixTask("Network status", "netstat -anp || netstat -an"),
790        WindowsTask("Network status", "netstat -anotb"),
791        AllOsTask("Network routing table", "netstat -rn"),
792        LinuxTask("Network socket statistics", "ss -an"),
793        LinuxTask("Extended socket statistics", "ss -an --info --processes", timeout=300),
794        UnixTask("Arp cache", "arp -na"),
795        LinuxTask("Iptables dump", "iptables-save"),
796        UnixTask("Raw /etc/hosts", "cat /etc/hosts"),
797        UnixTask("Raw /etc/resolv.conf", "cat /etc/resolv.conf"),
798        UnixTask("Raw /etc/nsswitch.conf", "cat /etc/nsswitch.conf"),
799        WindowsTask("Arp cache", "arp -a"),
800        WindowsTask("Network Interface Controller", "wmic nic"),
801        WindowsTask("Network Adapter", "wmic nicconfig"),
802        WindowsTask("Active network connection", "wmic netuse"),
803        WindowsTask("Protocols", "wmic netprotocol"),
804        WindowsTask("Hosts file", "type %SystemRoot%\system32\drivers\etc\hosts"),
805        WindowsTask("Cache memory", "wmic memcache"),
806        WindowsTask("Physical memory", "wmic memphysical"),
807        WindowsTask("Physical memory chip info", "wmic memorychip"),
808        WindowsTask("Local storage devices", "wmic logicaldisk"),
809        UnixTask("Filesystem", "df -ha"),
810        UnixTask("System activity reporter", "sar 1 10"),
811        UnixTask("System paging activity", "vmstat 1 10"),
812        UnixTask("System uptime", "uptime"),
813        UnixTask("Last logins of users and ttys", "last -x || last"),
814        UnixTask("couchbase user definition", "getent passwd couchbase"),
815        UnixTask("couchbase user limits", "su couchbase -s /bin/sh -c \"ulimit -a\"",
816                 privileged=True),
817        UnixTask("Interrupt status", "intrstat 1 10"),
818        UnixTask("Processor status", "mpstat 1 10"),
819        UnixTask("System log", "cat /var/adm/messages"),
820        LinuxTask("Raw /proc/uptime", "cat /proc/uptime"),
821        LinuxTask("Systemd journal",
822                  "journalctl | gzip -c > systemd_journal.gz",
823                  change_dir=True, artifacts=['systemd_journal.gz']),
824        LinuxTask("All logs", "tar cz /var/log/syslog* /var/log/dmesg /var/log/messages* /var/log/daemon* /var/log/debug* /var/log/kern.log* 2>/dev/null",
825                  log_file="syslog.tar.gz", no_header=True),
826        LinuxTask("Relevant proc data", "echo %(programs)s | "
827                  "xargs -n1 pgrep | xargs -n1 -- sh -c 'echo $1; cat /proc/$1/status; cat /proc/$1/limits; cat /proc/$1/smaps; cat /proc/$1/numa_maps; cat /proc/$1/task/*/sched; echo' --" % locals()),
828        LinuxTask("Processes' environment", "echo %(programs)s | "
829                  r"xargs -n1 pgrep | xargs -n1 -- sh -c 'echo $1; ( cat /proc/$1/environ | tr \\0 \\n | egrep -v ^CB_MASTER_PASSWORD=\|^CBAUTH_REVRPC_URL=); echo' --" % locals()),
830        LinuxTask("Processes' stack",
831                  "for program in %(programs)s; do for thread in $(pgrep --lightweight $program); do echo $program/$thread:; cat /proc/$thread/stack; echo; done; done" % locals()),
832        LinuxTask("NUMA data", "numactl --hardware"),
833        LinuxTask("NUMA data", "numactl --show"),
834        LinuxTask("NUMA data", "cat /sys/devices/system/node/node*/numastat"),
835        UnixTask("Kernel log buffer", "dmesg -T || dmesg -H || dmesg"),
836        LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/transparent_hugepage/enabled"),
837        LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/transparent_hugepage/defrag"),
838        LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/redhat_transparent_hugepage/enabled"),
839        LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/redhat_transparent_hugepage/defrag"),
840        LinuxTask("Network statistics", "netstat -s"),
841        LinuxTask("Full raw netstat", "cat /proc/net/netstat"),
842        LinuxTask("CPU throttling info", "echo /sys/devices/system/cpu/cpu*/thermal_throttle/* | xargs -n1 -- sh -c 'echo $1; cat $1' --"),
843        LinuxTask("Raw PID 1 scheduler /proc/1/sched", "cat /proc/1/sched | head -n 1"),
844        LinuxTask("Raw PID 1 control groups /proc/1/cgroup", "cat /proc/1/cgroup"),
845        make_event_log_task(),
846        ]
847
848    return _tasks
849
850# stolen from http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
851def iter_flatten(iterable):
852    it = iter(iterable)
853    for e in it:
854        if isinstance(e, (list, tuple)):
855            for f in iter_flatten(e):
856                yield f
857        else:
858            yield e
859
860def flatten(iterable):
861    return [e for e in iter_flatten(iterable)]
862
863def read_guts(guts, key):
864    return guts.get(key, "")
865
866def winquote_path(s):
867    return '"'+s.replace("\\\\", "\\").replace('/', "\\")+'"'
868
869# python's split splits empty string to [''] which doesn't make any
870# sense. So this function works around that.
871def correct_split(string, splitchar):
872    rv = string.split(splitchar)
873    if rv == ['']:
874        rv = []
875    return rv
876
877def make_stats_archives_task(guts, initargs_path):
878    escript = exec_name("escript")
879    escript_wrapper = find_script("escript-wrapper")
880    dump_stats = find_script("dump-stats")
881    stats_dir = read_guts(guts, "stats_dir")
882
883    if dump_stats is None or escript_wrapper is None or not stats_dir:
884        return []
885
886    output_file = "stats_archives.json"
887    return AllOsTask("stats archives",
888                     [escript,
889                      escript_wrapper,
890                      "--initargs-path", initargs_path, "--",
891                      dump_stats, stats_dir, output_file],
892                     change_dir=True,
893                     artifacts=[output_file])
894
895def make_product_task(guts, initargs_path, memcached_pass, options):
896    root = os.path.abspath(os.path.join(initargs_path, "..", "..", "..", ".."))
897    dbdir = os.path.realpath(read_guts(guts, "db_dir"))
898    viewdir = os.path.realpath(read_guts(guts, "idx_dir"))
899    nodes = correct_split(read_guts(guts, "nodes"), ",")
900
901    diag_url = "http://%s:%s/diag?noLogs=1" % (local_url_addr, read_guts(guts, "rest_port"))
902    if not options.multi_node_diag:
903        diag_url += "&oneNode=1"
904
905
906    from distutils.spawn import find_executable
907
908    lookup_cmd = None
909    for cmd in ["dig", "nslookup", "host"]:
910        if find_executable(cmd) is not None:
911            lookup_cmd = cmd
912            break
913
914    lookup_tasks = []
915    if lookup_cmd is not None:
916        lookup_tasks = [UnixTask("DNS lookup information for %s" % node,
917                                 "%(lookup_cmd)s '%(node)s'" % locals())
918                        for node in nodes]
919
920    getent_tasks = [LinuxTask("Name Service Switch "
921                              "hosts database info for %s" % node,
922                              ["getent", "ahosts", node])
923                    for node in nodes]
924
925    query_tasks = []
926    query_port = read_guts(guts, "query_port")
927    if query_port:
928        def make(statement):
929            return make_query_task(statement, user="@",
930                                   password=memcached_pass,
931                                   port=query_port)
932
933        query_tasks = [make("SELECT * FROM system:datastores"),
934                       make("SELECT * FROM system:namespaces"),
935                       make("SELECT * FROM system:keyspaces"),
936                       make("SELECT * FROM system:indexes")]
937
938    index_tasks = []
939    index_port = read_guts(guts, "indexer_http_port")
940    if index_port:
941        index_tasks = [make_index_task("Index definitions are: ", "getIndexStatus",
942                                       memcached_pass, index_port),
943                       make_index_task("Indexer settings are: ", "settings",
944                                       memcached_pass, index_port),
945                       make_index_task("Index storage stats are: ", "stats/storage",
946                                       memcached_pass, index_port),
947                       make_index_task("MOI allocator stats are: ", "stats/storage/mm",
948                                       memcached_pass, index_port),
949                       make_index_task("Indexer Go routine dump: ", "debug/pprof/goroutine?debug=1",
950                                       memcached_pass, index_port, logfile="indexer_pprof.log"),
951                       make_index_task("Indexer Rebalance Tokens: ", "listRebalanceTokens",
952                                       memcached_pass, index_port),
953                       make_index_task("Indexer Metadata Tokens: ", "listMetadataTokens",
954                                       memcached_pass, index_port),
955                       make_index_task("Indexer Memory Profile: ", "debug/pprof/heap?debug=1",
956                                       memcached_pass, index_port, logfile="indexer_mprof.log"),
957        ]
958
959    projector_tasks = []
960    proj_port = read_guts(guts, "projector_port")
961    if proj_port:
962        proj_url = 'http://%s:%s/debug/pprof/goroutine?debug=1' % (local_url_addr, proj_port)
963        projector_tasks = [make_curl_task(name="Projector Go routine dump ",
964                                          user="@", password=memcached_pass,
965                                          url=proj_url, log_file="projector_pprof.log")]
966
967    fts_tasks = []
968    fts_port = read_guts(guts, "fts_http_port")
969    if fts_port:
970        url = 'http://%s:%s/api/diag' % (local_url_addr, fts_port)
971        fts_tasks = [make_curl_task(name="FTS /api/diag: ",
972                                    user="@", password=memcached_pass,
973                                    url=url,
974                                    log_file="fts_diag.json", no_header=True)]
975
976    cbas_tasks = []
977    cbas_port = read_guts(guts, "cbas_admin_port")
978    if cbas_port:
979        cbas_diag_url = 'http://%s:%s/analytics/node/diagnostics' % (local_url_addr, cbas_port)
980        cbas_parent_port = read_guts(guts, "cbas_parent_port")
981        cbas_pprof_url = 'http://%s:%s/debug/pprof/goroutine?debug=1' % (local_url_addr, cbas_parent_port)
982        def make_cbas(statement):
983            return make_query_task(statement, user="@",
984                                   password=memcached_pass,
985                                   port=cbas_port)
986
987        cbas_tasks = [make_curl_task(name="Analytics /analytics/node/diagnostics: ",
988                                     user="@", password=memcached_pass,
989                                     url=cbas_diag_url,
990                                     log_file="analytics_diag.json", no_header=True),
991                      make_curl_task(name="Analytics Go routine dump: ",
992                                     user="@", password=memcached_pass,
993                                     url=cbas_pprof_url,
994                                     log_file="analytics_pprof.log", no_header=True),
995                      make_cbas("select * from `Metadata`.`Dataverse`"),
996                      make_cbas("select * from `Metadata`.`Dataset`"),
997                      make_cbas("select * from `Metadata`.`Index`"),
998                      make_cbas("select * from `Metadata`.`Bucket`"),
999                      make_cbas("select * from `Metadata`.`Link`")]
1000
1001    eventing_tasks = []
1002    eventing_port = read_guts(guts, "eventing_http_port")
1003    if eventing_port:
1004        stats_url = 'http://%s:%s/api/v1/stats?type=full' % (local_url_addr, eventing_port)
1005        eventing_pprof_url = 'http://%s:%s/debug/pprof/goroutine?debug=1' % (local_url_addr, eventing_port)
1006        eventing_tasks = [make_curl_task(name="Eventing /api/v1/stats: ",
1007                                         user="@", password=memcached_pass,
1008                                         url=stats_url,
1009                                         log_file="eventing_stats.json", no_header=True),
1010                          make_curl_task(name="Eventing Go routine dump: ",
1011                                         user="@", password=memcached_pass,
1012                                         url=eventing_pprof_url,
1013                                         log_file="eventing_pprof.log", no_header=True)]
1014
1015    _tasks = [
1016        UnixTask("Directory structure",
1017                 ["ls", "-lRai", root]),
1018        UnixTask("Database directory structure",
1019                 ["ls", "-lRai", dbdir]),
1020        UnixTask("Index directory structure",
1021                 ["ls", "-lRai", viewdir]),
1022        UnixTask("couch_dbinfo",
1023                 ["find", dbdir, "-type", "f",
1024                  "-name", "*.couch.*",
1025                  "-exec", "couch_dbinfo", "{}", "+"]),
1026        LinuxTask("Database directory filefrag info",
1027                  ["find", dbdir, "-type", "f", "-exec", "filefrag", "-v", "{}", "+"]),
1028        LinuxTask("Index directory filefrag info",
1029                  ["find", viewdir, "-type", "f", "-exec", "filefrag", "-v", "{}", "+"]),
1030        WindowsTask("Database directory structure",
1031                    "dir /s " + winquote_path(dbdir)),
1032        WindowsTask("Index directory structure",
1033                    "dir /s " + winquote_path(viewdir)),
1034        WindowsTask("Version file",
1035                    "type " + winquote_path(basedir()) + "\\..\\VERSION.txt"),
1036        WindowsTask("Manifest file",
1037                    "type " + winquote_path(basedir()) + "\\..\\manifest.txt"),
1038        WindowsTask("Manifest file",
1039                    "type " + winquote_path(basedir()) + "\\..\\manifest.xml"),
1040        LinuxTask("Version file", "cat '%s/VERSION.txt'" % root),
1041        LinuxTask("Variant file", "cat '%s/VARIANT.txt'" % root),
1042        LinuxTask("Manifest file", "cat '%s/manifest.txt'" % root),
1043        LinuxTask("Manifest file", "cat '%s/manifest.xml'" % root),
1044        LiteralTask("Couchbase config", read_guts(guts, "ns_config")),
1045        LiteralTask("Couchbase static config", read_guts(guts, "static_config")),
1046        LiteralTask("Raw ns_log", read_guts(guts, "ns_log")),
1047        # TODO: just gather those in python
1048        WindowsTask("Memcached logs",
1049                    "cd " + winquote_path(read_guts(guts, "memcached_logs_path")) + " && " +
1050                    "for /f %a IN ('dir memcached.log.* /od /tw /b') do type %a",
1051                    log_file="memcached.log"),
1052        UnixTask("Memcached logs",
1053                 ["sh", "-c", 'cd "$1"; for file in $(ls -tr memcached.log.*); do cat \"$file\"; done', "--", read_guts(guts, "memcached_logs_path")],
1054                 log_file="memcached.log"),
1055        [WindowsTask("Ini files (%s)" % p,
1056                     "type " + winquote_path(p),
1057                     log_file="ini.log")
1058         for  p in read_guts(guts, "couch_inis").split(";")],
1059        UnixTask("Ini files",
1060                 ["sh", "-c", 'for i in "$@"; do echo "file: $i"; cat "$i"; done', "--"] + read_guts(guts, "couch_inis").split(";"),
1061                 log_file="ini.log"),
1062
1063        make_curl_task(name="couchbase diags",
1064                       user="@",
1065                       password=memcached_pass,
1066                       timeout=600,
1067                       url=diag_url,
1068                       log_file="diag.log"),
1069
1070        make_curl_task(name="master events",
1071                       user="@",
1072                       password=memcached_pass,
1073                       timeout=300,
1074                       url='http://%s:%s/diag/masterEvents?o=1' % (local_url_addr, read_guts(guts, "rest_port")),
1075                       log_file="master_events.log",
1076                       no_header=True),
1077
1078        make_curl_task(name="ale configuration",
1079                       user="@",
1080                       password=memcached_pass,
1081                       url='http://%s:%s/diag/ale' % (local_url_addr, read_guts(guts, "rest_port")),
1082                       log_file="couchbase.log"),
1083
1084        [AllOsTask("couchbase logs (%s)" % name, "cbbrowse_logs %s" % name,
1085                   addenv = [("REPORT_DIR", read_guts(guts, "log_path"))],
1086                   log_file="ns_server.%s" % name)
1087         for name in ["debug.log", "info.log", "error.log", "couchdb.log",
1088                      "xdcr_target.log",
1089                      "views.log", "mapreduce_errors.log",
1090                      "stats.log", "babysitter.log",
1091                      "reports.log", "http_access.log",
1092                      "http_access_internal.log", "ns_couchdb.log",
1093                      "goxdcr.log", "query.log", "projector.log", "indexer.log",
1094                      "fts.log", "metakv.log", "json_rpc.log", "eventing.log",
1095                      "analytics_info.log", "analytics_debug.log", "analytics_shutdown.log",
1096                      "analytics_error.log", "analytics_warn.log", "analytics_dcpdebug.log",
1097                      "analytics_trace.json", "analytics_access.log"]],
1098
1099        [make_cbstats_task(kind, memcached_pass, guts)
1100         for kind in ["all", "allocator", "checkpoint", "config",
1101                      "dcp", "dcpagg",
1102                      ["diskinfo", "detail"], ["dispatcher", "logs"],
1103                      "eviction", "failovers",
1104                      "kvstore", "kvtimings", "memory",
1105                      "prev-vbucket",
1106                      "runtimes", "scheduler",
1107                      "tasks",
1108                      "timings", "uuid",
1109                      "vbucket", "vbucket-details", "vbucket-seqno",
1110                      "warmup", "workload"]],
1111
1112        [AllOsTask("memcached mcstat %s" % kind,
1113                   flatten(["mcstat", "-h", "%s:%s" % (local_url_addr, read_guts(guts, "memcached_port")),
1114                            "-u", read_guts(guts, "memcached_admin"), kind]),
1115                   log_file="stats.log",
1116                   timeout=60,
1117                   addenv=[("CB_PASSWORD", memcached_pass)])
1118         for kind in ["connections", "tracing"]],
1119
1120        [AllOsTask("fts mossScope (%s)" % path,
1121                   ["mossScope", "stats", "diag", path],
1122                   log_file="fts_store_stats.log")
1123         for path in glob.glob(os.path.join(viewdir, "@fts", "*.pindex", "store"))
1124            if any(".moss" in entry for entry in os.listdir(path))],
1125
1126        [AllOsTask("fts scorch zap (%s)" % path,
1127                   ["cbft-bleve", "zap", "footer", path],
1128                   log_file="fts_store_stats.log")
1129         for path in glob.glob(os.path.join(viewdir, "@fts", "*.pindex", "store", "*.zap"))],
1130
1131        [AllOsTask("ddocs for %s (%s)" % (bucket, path),
1132                   ["couch_dbdump", path],
1133                   log_file = "ddocs.log")
1134         for bucket in set(correct_split(read_guts(guts, "buckets"), ",")) - set(correct_split(read_guts(guts, "memcached_buckets"), ","))
1135         for path in glob.glob(os.path.join(dbdir, bucket, "master.couch*"))],
1136
1137        [AllOsTask("Couchstore local documents (%s, %s)" % (bucket, os.path.basename(path)),
1138                   ["couch_dbdump", "--local", path],
1139                   log_file = "couchstore_local.log")
1140        for bucket in set(correct_split(read_guts(guts, "buckets"), ",")) - set(correct_split(read_guts(guts, "memcached_buckets"), ","))
1141        for path in glob.glob(os.path.join(dbdir, bucket, "*.couch.*"))],
1142
1143        # RocksDB has logs per DB (i.e. vBucket). 'LOG' is the most
1144        # recent file, with old files named LOG.old.<timestamp>.
1145        # Sort so we go from oldest -> newest as per other log files.
1146        [AllOsTask("RocksDB Log file (%s, %s)" % (bucket, os.path.basename(path)),
1147                   "cat '%s'" % (log_file),
1148                   log_file="kv_rocks.log")
1149        for bucket in (set(correct_split(read_guts(guts, "buckets"), ",")) -
1150                       set(correct_split(read_guts(guts, "memcached_buckets"), ",")))
1151        for path in glob.glob(os.path.join(dbdir, bucket, "rocksdb.*"))
1152        for log_file in sorted(glob.glob(os.path.join(path, "LOG.old.*"))) + [os.path.join(path, "LOG")]],
1153
1154        [UnixTask("moxi stats (port %s)" % port,
1155                  "echo stats proxy | nc %s %s" % (local_addr, port),
1156                  log_file="stats.log",
1157                  timeout=60)
1158         for port in correct_split(read_guts(guts, "moxi_ports"), ",")],
1159
1160        [AllOsTask("mctimings %s" % stat,
1161                   ["mctimings",
1162                    "-u", read_guts(guts, "memcached_admin"),
1163                    "-h", "%s:%s" % (local_url_addr, read_guts(guts, "memcached_port")),
1164                    "-v"] + stat,
1165                   log_file="stats.log",
1166                   timeout=60,
1167                   addenv=[("CB_PASSWORD", memcached_pass)])
1168         for stat in ([], ["subdoc_execute"])],
1169
1170        CollectFile("Users storage", read_guts(guts, "users_storage_path")),
1171
1172        make_stats_archives_task(guts, initargs_path),
1173        AllOsTask("Phosphor Trace",
1174                  ["kv_trace_dump",
1175                   "-H", "%s:%s" % (local_url_addr, read_guts(guts, "memcached_port")),
1176                   "-u", read_guts(guts, "memcached_admin"),
1177                   "-P", memcached_pass,
1178                   "kv_trace.json"],
1179                  timeout=120,
1180                  log_file="stats.log",
1181                  change_dir=True,
1182                  artifacts=["kv_trace.json"]),
1183        ]
1184
1185    _tasks = flatten([getent_tasks, lookup_tasks, query_tasks, index_tasks,
1186                      projector_tasks, fts_tasks, cbas_tasks, eventing_tasks, _tasks])
1187
1188    return _tasks
1189
1190def find_script(name):
1191    dirs = [basedir(), os.path.join(basedir(), "scripts")]
1192    for d in dirs:
1193        path = os.path.join(d, name)
1194        if os.path.exists(path):
1195            log("Found %s: %s" % (name, path))
1196            return os.path.abspath(path)
1197
1198    return None
1199
1200def get_server_guts(initargs_path):
1201    dump_guts_path = find_script("dump-guts")
1202
1203    if dump_guts_path is None:
1204        log("Couldn't find dump-guts script. Some information will be missing")
1205        return {}
1206
1207    escript = exec_name("escript")
1208    extra_args = os.getenv("EXTRA_DUMP_GUTS_ARGS")
1209    args = [escript, dump_guts_path, "--initargs-path", initargs_path]
1210    if extra_args:
1211        args = args + extra_args.split(";")
1212    print("Checking for server guts in %s..." % initargs_path)
1213    p = subprocess.Popen(args, stdout = subprocess.PIPE)
1214    output = p.stdout.read()
1215    p.wait()
1216    rc = p.returncode
1217    # print("args: %s gave rc: %d and:\n\n%s\n" % (args, rc, output))
1218    tokens = output.rstrip("\0").split("\0")
1219    d = {}
1220    if len(tokens) > 1:
1221        for i in xrange(0, len(tokens), 2):
1222            d[tokens[i]] = tokens[i+1]
1223    return d
1224
1225def guess_utility(command):
1226    if isinstance(command, list):
1227        command = ' '.join(command)
1228
1229    if not command:
1230        return None
1231
1232    if re.findall(r'[|;&]|\bsh\b|\bsu\b|\bfind\b|\bfor\b', command):
1233        # something hard to easily understand; let the human decide
1234        return command
1235    else:
1236        return command.split()[0]
1237
1238def dump_utilities(*args, **kwargs):
1239    specific_platforms = { SolarisTask : 'Solaris',
1240                           LinuxTask :  'Linux',
1241                           WindowsTask : 'Windows',
1242                           MacOSXTask : 'Mac OS X' }
1243    platform_utils = dict((name, set()) for name in specific_platforms.values())
1244
1245    class FakeOptions(object):
1246        def __getattr__(self, name):
1247            return None
1248
1249    tasks = make_os_tasks() + make_product_task({}, "", "", FakeOptions())
1250
1251    for task in tasks:
1252        utility = guess_utility(task.command)
1253        if utility is None:
1254            continue
1255
1256        for (platform, name) in specific_platforms.items():
1257            if isinstance(task, platform):
1258                platform_utils[name].add(utility)
1259
1260    print '''This is an autogenerated, possibly incomplete and flawed list
1261of utilites used by cbcollect_info'''
1262
1263    for (name, utilities) in sorted(platform_utils.items(), key=lambda x: x[0]):
1264        print "\n%s:" % name
1265
1266        for utility in sorted(utilities):
1267            print "        - %s" % utility
1268
1269    sys.exit(0)
1270
1271def stdin_watcher():
1272    fd = sys.stdin.fileno()
1273
1274    while True:
1275        try:
1276            buf = os.read(fd, 1024)
1277            # stdin closed
1278            if not buf:
1279                break
1280
1281            if buf.find('\n') != -1:
1282                break
1283        except OSError, e:
1284            if e.errno != errno.EINTR:
1285                raise
1286
1287def setup_stdin_watcher():
1288    def _in_thread():
1289        try:
1290            stdin_watcher()
1291        finally:
1292            AltExit.exit(2)
1293    th = threading.Thread(target = _in_thread)
1294    th.setDaemon(True)
1295    th.start()
1296
1297class CurlKiller:
1298    def __init__(self, p):
1299        self.p = p
1300    def cleanup(self):
1301        if self.p != None:
1302            print("Killing curl...")
1303            os.kill(self.p.pid, signal.SIGKILL)
1304            print("done")
1305    def disarm(self):
1306        self.p = None
1307
1308def do_upload_and_exit(path, url, proxy):
1309    output_fd, output_file = tempfile.mkstemp()
1310    os.close(output_fd)
1311
1312    AltExit.register(lambda: os.unlink(output_file))
1313
1314    args = ["curl", "-sS",
1315            "--output", output_file,
1316            "--proxy", proxy,
1317            "--write-out", "%{http_code}", "--upload-file", path, url]
1318    AltExit.lock.acquire()
1319    try:
1320        p = subprocess.Popen(args, stdout=subprocess.PIPE)
1321        k = CurlKiller(p)
1322        AltExit.register_and_unlock(k.cleanup)
1323    except Exception, e:
1324        AltExit.lock.release()
1325        raise e
1326
1327    stdout, _ = p.communicate()
1328    k.disarm()
1329
1330    if p.returncode != 0:
1331        sys.exit(1)
1332    else:
1333        if stdout.strip() == '200':
1334            log('Upload path is: %s' % url)
1335            log('Done uploading')
1336            sys.exit(0)
1337        else:
1338            log('HTTP status code: %s' % stdout)
1339            sys.exit(1)
1340
1341def parse_host(host):
1342    url = urlparse.urlsplit(host)
1343    if not url.scheme:
1344        url = urlparse.urlsplit('https://' + host)
1345
1346    return url.scheme, url.netloc, url.path
1347
1348def generate_upload_url(parser, options, zip_filename):
1349    upload_url = None
1350    if options.upload_host:
1351        if not options.upload_customer:
1352            parser.error("Need --customer when --upload-host is given")
1353
1354        scheme, netloc, path = parse_host(options.upload_host)
1355
1356        customer = urllib.quote(options.upload_customer)
1357        fname = urllib.quote(os.path.basename(zip_filename))
1358        if options.upload_ticket:
1359            full_path = '%s/%s/%d/%s' % (path, customer, options.upload_ticket, fname)
1360        else:
1361            full_path = '%s/%s/%s' % (path, customer, fname)
1362
1363        upload_url = urlparse.urlunsplit((scheme, netloc, full_path, '', ''))
1364        log("Will upload collected .zip file into %s" % upload_url)
1365    return upload_url
1366
1367def check_ticket(option, opt, value):
1368    if re.match('^\d{1,7}$', value):
1369        return int(value)
1370    else:
1371        raise optparse.OptionValueError(
1372            "option %s: invalid ticket number: %r" % (opt, value))
1373
1374class CbcollectInfoOptions(optparse.Option):
1375    from copy import copy
1376
1377    TYPES = optparse.Option.TYPES + ("ticket",)
1378    TYPE_CHECKER = copy(optparse.Option.TYPE_CHECKER)
1379    TYPE_CHECKER["ticket"] = check_ticket
1380
1381def main():
1382    # ask all tools to use C locale (MB-12050)
1383    os.environ['LANG'] = 'C'
1384    os.environ['LC_ALL'] = 'C'
1385
1386    mydir = os.path.dirname(sys.argv[0])
1387    #(MB-8239)erl script fails in OSX as it is unable to find COUCHBASE_TOP -ravi
1388    if platform.system() == 'Darwin':
1389        os.environ["COUCHBASE_TOP"] = os.path.abspath(os.path.join(mydir, ".."))
1390
1391    parser = optparse.OptionParser(usage=USAGE, option_class=CbcollectInfoOptions)
1392    parser.add_option("-r", dest="root",
1393                      help="root directory - defaults to %s" % (mydir + "/.."),
1394                      default=os.path.abspath(os.path.join(mydir, "..")))
1395    parser.add_option("-v", dest="verbosity", help="increase verbosity level",
1396                      action="count", default=0)
1397    parser.add_option("-p", dest="product_only", help="gather only product related information",
1398                      action="store_true", default=False)
1399    parser.add_option("-d", action="callback", callback=dump_utilities,
1400                      help="dump a list of commands that cbcollect_info needs")
1401    parser.add_option("--watch-stdin", dest="watch_stdin",
1402                      action="store_true", default=False,
1403                      help=optparse.SUPPRESS_HELP)
1404    parser.add_option("--initargs", dest="initargs", help="server 'initargs' path")
1405    parser.add_option("--multi-node-diag", dest="multi_node_diag",
1406                      action="store_true", default=False,
1407                      help="collect per-node diag  on all reachable nodes (default is just this node)")
1408    parser.add_option("--log-redaction-level", dest="redact_level",
1409                      default="none",
1410                      help="redaction level for the logs collected, none and partial supported (default is none)")
1411    parser.add_option("--log-redaction-salt", dest="salt_value",
1412                      default=str(uuid.uuid4()),
1413                      help="Is used to salt the hashing of tagged data, \
1414                            defaults to random uuid. If input by user it should \
1415                            be provided along with --log-redaction-level option")
1416    parser.add_option("--just-upload-into", dest="just_upload_into",
1417                      help=optparse.SUPPRESS_HELP)
1418    parser.add_option("--upload-host", dest="upload_host",
1419                      help="gather diagnostics and upload it for couchbase support. Gives upload host")
1420    parser.add_option("--customer", dest="upload_customer",
1421                      help="specifies customer name for upload")
1422    parser.add_option("--ticket", dest="upload_ticket", type='ticket',
1423                      help="specifies support ticket number for upload")
1424    parser.add_option("--bypass-sensitive-data", dest="bypass_sensitive_data",
1425                      action="store_true", default=False,
1426                      help="do not collect sensitive data")
1427    parser.add_option("--task-regexp", dest="task_regexp",
1428                      default="",
1429                      help="Run only tasks matching regexp. For debugging purposes only.")
1430    parser.add_option("--tmp-dir", dest="tmp_dir", default=None,
1431                      help="set the temp dir used while processing collected data. Overrides the TMPDIR env variable if set")
1432    parser.add_option("--upload-proxy", dest="upload_proxy", default="",
1433                      help="specifies proxy for upload")
1434    options, args = parser.parse_args()
1435
1436    if len(args) != 1:
1437        parser.error("incorrect number of arguments. Expecting filename to collect diagnostics into")
1438
1439    if options.watch_stdin:
1440        setup_stdin_watcher()
1441
1442    zip_filename = args[0]
1443    if zip_filename[-4:] != '.zip':
1444        zip_filename = zip_filename + '.zip'
1445
1446    zip_dir = os.path.dirname(os.path.abspath(zip_filename))
1447
1448    if not os.access(zip_dir, os.W_OK | os.X_OK):
1449        print("do not have write access to the directory %s" % (zip_dir))
1450        sys.exit(1)
1451
1452    if options.redact_level != "none" and options.redact_level != "partial":
1453        parser.error("Invalid redaction level. Only 'none' and 'partial' are supported.")
1454
1455    redact_zip_file = zip_filename[:-4] + "-redacted" + zip_filename[-4:]
1456    upload_url = ""
1457    if options.redact_level != "none":
1458        upload_url = generate_upload_url(parser, options, redact_zip_file)
1459    else:
1460        upload_url = generate_upload_url(parser, options, zip_filename)
1461
1462
1463    erldir = os.path.join(mydir, 'erlang', 'bin')
1464    if os.name == 'posix':
1465        path = [mydir,
1466                '/opt/couchbase/bin',
1467                erldir,
1468                os.environ['PATH'],
1469                '/bin',
1470                '/sbin',
1471                '/usr/bin',
1472                '/usr/sbin']
1473        os.environ['PATH'] = ':'.join(path)
1474
1475        library_path = [os.path.join(options.root, 'lib')]
1476
1477        current_library_path = os.environ.get('LD_LIBRARY_PATH')
1478        if current_library_path is not None:
1479            library_path.append(current_library_path)
1480
1481        os.environ['LD_LIBRARY_PATH'] = ':'.join(library_path)
1482    elif os.name == 'nt':
1483      path = [mydir, erldir, os.environ['PATH']]
1484      os.environ['PATH'] = ';'.join(path)
1485
1486    if options.just_upload_into != None:
1487        do_upload_and_exit(args[0], options.just_upload_into,
1488                options.upload_proxy)
1489
1490    runner = TaskRunner(verbosity=options.verbosity,
1491                        task_regexp=options.task_regexp,
1492                        tmp_dir=options.tmp_dir,
1493                        salt_value=options.salt_value)
1494    runner.run(make_redaction_task()) # We want this at the top of couchbase.log
1495
1496    if not options.product_only:
1497        runner.run_tasks(make_os_tasks())
1498
1499    initargs_variants = [os.path.abspath(os.path.join(options.root, "var", "lib", "couchbase", "initargs")),
1500                         "/opt/couchbase/var/lib/couchbase/initargs",
1501                         os.path.expanduser("~/Library/Application Support/Couchbase/var/lib/couchbase/initargs")]
1502
1503    if options.initargs != None:
1504        initargs_variants = [options.initargs]
1505
1506    guts = None
1507    guts_initargs_path = None
1508
1509    for initargs_path in initargs_variants:
1510        d = get_server_guts(initargs_path)
1511        # print("for initargs: %s got:\n%s" % (initargs_path, d))
1512        if len(d) > 0:
1513            guts = d
1514            guts_initargs_path = os.path.abspath(initargs_path)
1515            break
1516
1517    if guts is None:
1518        log("Couldn't read server guts. Using some default values.")
1519
1520        prefix = None
1521        if platform.system() == 'Windows':
1522            prefix = 'c:/Program Files/Couchbase/Server'
1523        elif platform.system() == 'Darwin':
1524            prefix = '~/Library/Application Support/Couchbase'
1525        else:
1526            prefix = '/opt/couchbase'
1527
1528        guts = {"db_dir" : os.path.join(prefix, "var/lib/couchbase/data"),
1529                "idx_dir" : os.path.join(prefix, "var/lib/couchbase/data"),
1530                "ns_log_path" : os.path.join(prefix, "var/lib/couchbase/ns_log"),
1531                "log_path" : os.path.join(prefix, "var/lib/couchbase/logs"),
1532                "memcached_logs_path" : os.path.join(prefix, "var/lib/couchbase/logs")}
1533
1534        guts_initargs_path = os.path.abspath(prefix)
1535
1536    ipv6 = read_guts(guts, "ipv6") == "true"
1537    set_local_addr(ipv6)
1538
1539    memcached_password =  get_diag_password(guts)
1540
1541    zip_node = read_guts(guts, "node")
1542    runner.literal("product diag header",
1543                   "Found server initargs at %s (%d)" % (guts_initargs_path, len(guts)))
1544
1545    runner.run_tasks(make_product_task(guts, guts_initargs_path,
1546                                       memcached_password, options))
1547
1548    # Collect breakpad crash dumps.
1549    if options.bypass_sensitive_data:
1550        log("Bypassing Sensitive Data: Breakpad crash dumps")
1551    else:
1552        memcached_breakpad_minidump_dir = read_guts(guts, "memcached_breakpad_minidump_dir")
1553        for dump in glob.glob(os.path.join(memcached_breakpad_minidump_dir, "*.dmp")):
1554            runner.collect_file(dump)
1555
1556        # Collect indexer breakpad minidumps
1557        index_port = read_guts(guts, "indexer_http_port")
1558        if index_port:
1559            indexer_breakpad_minidump_dir = read_guts(guts, "indexer_breakpad_minidump_dir")
1560            if memcached_breakpad_minidump_dir != indexer_breakpad_minidump_dir:
1561                for dump in glob.glob(os.path.join(indexer_breakpad_minidump_dir, "*.dmp")):
1562                    runner.collect_file(dump)
1563
1564    fts_port = read_guts(guts, "fts_http_port")
1565    if fts_port:
1566        idx_dir = read_guts(guts, "idx_dir")
1567        for dump in glob.glob(os.path.join(idx_dir, "@fts", "dumps", "*.dump.txt")):
1568            runner.collect_file(dump)
1569
1570    addr = zip_node.split("@")[-1]
1571    if addr == "127.0.0.1" or addr == "::1":
1572        zip_node = '@'.join(zip_node.split("@")[:-1] + [find_primary_addr(ipv6, addr)])
1573
1574    if options.verbosity:
1575        log("Python version: %s" % sys.version)
1576
1577    runner.literal("cbcollect_info log", log_stream.getvalue(),
1578                   log_file="cbcollect_info.log", no_header=True)
1579
1580    runner.close_all_files()
1581
1582    if options.redact_level != "none":
1583        log("Redacting log files to level: %s" % options.redact_level)
1584        runner.redact_and_zip(redact_zip_file, zip_node)
1585
1586    runner.zip(zip_filename, zip_node)
1587
1588    if upload_url and options.redact_level != "none":
1589        do_upload_and_exit(redact_zip_file, upload_url, options.upload_proxy)
1590    elif upload_url:
1591        do_upload_and_exit(zip_filename, upload_url, options.upload_proxy)
1592
1593def find_primary_addr(ipv6, default = None):
1594    Family = socket.AF_INET6 if ipv6 else socket.AF_INET
1595    DnsAddr = "2001:4860:4860::8844" if ipv6 else "8.8.8.8"
1596    s = socket.socket(Family, socket.SOCK_DGRAM)
1597    try:
1598        s.connect((DnsAddr, 56))
1599        if ipv6:
1600            addr, port, _, _ = s.getsockname()
1601        else:
1602            addr, port = s.getsockname()
1603
1604        return addr
1605    except socket.error:
1606        return default
1607    finally:
1608        s.close()
1609
1610def exec_name(name):
1611    if sys.platform == 'win32':
1612        name += ".exe"
1613    return name
1614
1615if __name__ == '__main__':
1616    main()
1617