xref: /5.5.2/ns_server/cbcollect_info (revision 5ae02958)
1#!/usr/bin/env python
2# -*- python -*-
3#
4# @author Couchbase <info@couchbase.com>
5# @copyright 2011-2018 Couchbase, Inc.
6#
7# Licensed under the Apache License, Version 2.0 (the "License");
8# you may not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11#      http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18import os
19import sys
20import tempfile
21import time
22import subprocess
23import string
24import re
25import platform
26import glob
27import socket
28import threading
29import optparse
30import atexit
31import signal
32import urllib
33import shutil
34import urlparse
35import errno
36import hashlib
37import uuid
38from datetime import datetime, timedelta, tzinfo
39from StringIO import StringIO
40
41class AltExitC(object):
42    def __init__(self):
43        self.list = []
44        self.lock = threading.Lock()
45        atexit.register(self.at_exit_handler)
46
47    def register(self, f):
48        self.lock.acquire()
49        self.register_and_unlock(f)
50
51    def register_and_unlock(self, f):
52        try:
53            self.list.append(f)
54        finally:
55            self.lock.release()
56
57    def at_exit_handler(self):
58        self.lock.acquire()
59        self.list.reverse()
60        for f in self.list:
61            try:
62                f()
63            except:
64                pass
65
66    def exit(self, status):
67        self.at_exit_handler()
68        os._exit(status)
69
70AltExit = AltExitC()
71
72USAGE = """usage: %prog [options] output_file.zip
73
74- Linux/Windows/OSX:
75    %prog output_file.zip
76    %prog -v output_file.zip"""
77
78# adapted from pytz
79class FixedOffsetTZ(tzinfo):
80    def __init__(self, minutes):
81        if abs(minutes) >= 1440:
82            raise ValueError("absolute offset is too large", minutes)
83        self._minutes = minutes
84        self._offset = timedelta(minutes=minutes)
85
86    def utcoffset(self, dt):
87        return self._offset
88
89    def dst(self, dt):
90        return timedelta(0)
91
92    def tzname(self, dt):
93        return None
94
95local_tz = FixedOffsetTZ(minutes=time.timezone / 60)
96log_stream = StringIO()
97local_addr = None
98local_url_addr = None
99
100def set_local_addr(ipv6):
101    global local_addr
102    global local_url_addr
103
104    local_addr = "::1" if ipv6 else "127.0.0.1"
105    local_url_addr = "[::1]" if ipv6 else "127.0.0.1"
106
107log_line = None
108def buffer_log_line(message, new_line):
109    global log_line
110
111    line = log_line
112    if line is None:
113        now = datetime.now(tz=local_tz)
114        line = '[%s] ' % now.isoformat()
115
116    line += message
117    if new_line:
118        log_line = None
119        return line
120    else:
121        log_line = line
122        return None
123
124def log(message, new_line=True):
125    global log_stream
126
127    if new_line:
128        message += '\n'
129
130    bufline = buffer_log_line(message, new_line)
131    if bufline is not None:
132        log_stream.write(bufline)
133
134    sys.stderr.write(message)
135    sys.stderr.flush()
136
137class AccessLogProcessor:
138    def __init__(self, salt):
139        self.salt = salt
140        self.column_parser = re.compile(
141            r'(^\S* \S* )(\S*)( \[.*\] \"\S* )(\S*)( .*$)')
142        self.urls_to_redact = [['/settings/rbac/users',
143                                re.compile(r'\/(?P<user>[^\/\s#&]+)([#&]|$)'),
144                                self._process_user, "user"],
145                               ['/_cbauth/checkPermission',
146                                re.compile(r'user=(?P<user>[^\s&#]+)'),
147                                self._process_user, "user"],
148                               ['/pools/default/buckets',
149                                re.compile(r'\/(?:[^\/\s#&]+)\/docs\/'
150                                            '(?P<docid>[^\/\s#&]+)$'),
151                                self._process_docid, "docid"]]
152
153    def _process_url(self, surl):
154        for conf in self.urls_to_redact:
155            prefix = conf[0]
156            if surl[:len(prefix)] == prefix:
157                return prefix + self._process_url_tail(conf[1], conf[2],
158                                                       conf[3],
159                                                       surl[len(prefix):])
160        return surl
161
162    def _process_url_tail(self, rex, fn, key, s):
163        m = rex.search(s)
164        if m != None:
165            return s[:m.start(key)] + fn(m.group(key)) + s[m.end(key):]
166        else:
167            return s
168
169    def _process_user(self, user):
170        if user == '-' or user[0] == '@':
171            return user
172        elif user[-3:] == "/UI":
173            return self._hash(user[:-3]) + "/UI"
174        else:
175            return self._hash(user)
176
177    def _process_docid(self, docid):
178        return self._hash(docid)
179
180    def _hash(self, token):
181        return hashlib.sha1(self.salt + token).hexdigest()
182
183    def _repl_columns(self, matchobj):
184        return matchobj.group(1) + \
185            self._process_user(matchobj.group(2)) + \
186            matchobj.group(3) + \
187            self._process_url(matchobj.group(4)) + \
188            matchobj.group(5)
189
190    def do(self, line):
191        return self.column_parser.sub(self._repl_columns, line)
192
193class RegularLogProcessor:
194    rexes = [re.compile('(<ud>)(.+?)(</ud>)'),
195             re.compile('("--log-redaction-salt=)(.+?)(")')]
196
197    def __init__(self, salt):
198        self.salt = salt
199
200    def _hash(self, match):
201        h = hashlib.sha1(self.salt + match.group(2)).hexdigest()
202        return match.group(1) + h + match.group(3)
203
204    def _process_line(self, line):
205        for rex in self.rexes:
206            line = rex.sub(self._hash, line)
207        return line
208
209    def do(self, line):
210        return self._process_line(line)
211
212class CouchbaseLogProcessor(RegularLogProcessor):
213    rexes = [re.compile('(--log-redaction-salt=)(.+?)( )')]
214
215    def do(self, line):
216        if "RedactLevel" in line:
217            # salt + salt to maintain consistency with other
218            # occurances of hashed salt in the logs.
219            return 'RedactLevel:partial,HashOfSalt:%s\n' \
220                % hashlib.sha1(self.salt + self.salt).hexdigest()
221        else:
222            return self._process_line(line)
223
224class LogRedactor:
225    def __init__(self, salt, tmpdir, default_name):
226        self.default_name = default_name
227        self.target_dir = os.path.join(tmpdir, "redacted")
228        os.makedirs(self.target_dir)
229
230        self.access_log = AccessLogProcessor(salt)
231        self.couchbase_log = CouchbaseLogProcessor(salt)
232        self.regular_log = RegularLogProcessor(salt)
233
234    def _process_file(self, ifile, ofile, processor):
235        try:
236            with open(ifile, 'r') as inp:
237                with open(ofile, 'w+') as out:
238                    for line in inp:
239                        out.write(processor.do(line))
240        except IOError as e:
241            log("I/O error(%s): %s" % (e.errno, e.strerror))
242
243    def redact_file(self, name, ifile):
244        ofile = os.path.join(self.target_dir, name)
245        if "http_access" in name:
246            self._process_file(ifile, ofile, self.access_log)
247        elif name == self.default_name:
248            self._process_file(ifile, ofile, self.couchbase_log)
249        else:
250            self._process_file(ifile, ofile, self.regular_log)
251        return ofile
252
253class Task(object):
254    privileged = False
255    no_header = False
256    num_samples = 1
257    interval = 0
258    def __init__(self, description, command, timeout=None, **kwargs):
259        self.description = description
260        self.command = command
261        self.timeout = timeout
262        self.__dict__.update(kwargs)
263        self._is_posix = (os.name == 'posix')
264
265    def _platform_popen_flags(self):
266        flags = {}
267        if self._is_posix:
268            flags['preexec_fn'] = os.setpgrp
269
270        return flags
271
272    def _can_kill(self, p):
273        if self._is_posix:
274            return True
275
276        return hasattr(p, 'kill')
277
278    def _kill(self, p):
279        if self._is_posix:
280            group_pid = os.getpgid(p.pid)
281            os.killpg(group_pid, signal.SIGKILL)
282        else:
283            p.kill()
284
285    def _env_flags(self):
286        flags = {}
287        if hasattr(self, 'addenv'):
288            env = os.environ.copy()
289            env.update(self.addenv)
290            flags['env'] = env
291
292        return flags
293
294    def _cwd_flags(self):
295        flags = {}
296        if getattr(self, 'change_dir', False):
297            cwd = self._task_runner.tmpdir
298            if isinstance(self.change_dir, str):
299                cwd = self.change_dir
300
301            flags['cwd'] = cwd
302
303        return flags
304
305    def _extra_flags(self):
306        flags = self._env_flags()
307        flags.update(self._platform_popen_flags())
308        flags.update(self._cwd_flags())
309
310        return flags
311
312    def set_task_runner(self, runner):
313        self._task_runner = runner
314
315    def execute(self, fp):
316        """Run the task"""
317        use_shell = not isinstance(self.command, list)
318        extra_flags = self._extra_flags()
319        try:
320            p = subprocess.Popen(self.command, bufsize=-1,
321                                 stdin=subprocess.PIPE,
322                                 stdout=subprocess.PIPE,
323                                 stderr=subprocess.STDOUT,
324                                 shell=use_shell,
325                                 **extra_flags)
326            if hasattr(self, 'to_stdin'):
327                p.stdin.write(self.to_stdin)
328
329            p.stdin.close()
330
331        except OSError, e:
332            # if use_shell is False then Popen may raise exception
333            # if binary is missing. In this case we mimic what
334            # shell does. Namely, complaining to stderr and
335            # setting non-zero status code. It's might also
336            # automatically handle things like "failed to fork due
337            # to some system limit".
338            print >> fp, "Failed to execute %s: %s" % (self.command, e)
339            return 127
340
341        except IOError, e:
342            if e.errno == errno.EPIPE:
343                print >> fp, "Ignoring broken pipe on stdin for %s" % self.command
344            else:
345                raise
346
347        from threading import Timer, Event
348
349        timer = None
350        timer_fired = Event()
351
352        if self.timeout is not None and self._can_kill(p):
353            def on_timeout():
354                try:
355                    self._kill(p)
356                except:
357                    # the process might have died already
358                    pass
359
360                timer_fired.set()
361
362            timer = Timer(self.timeout, on_timeout)
363            timer.start()
364
365        try:
366            while True:
367                data = p.stdout.read(64 * 1024)
368                if not data:
369                    break
370
371                fp.write(data)
372        finally:
373            if timer is not None:
374                timer.cancel()
375                timer.join()
376
377                # there's a tiny chance that command succeeds just before
378                # timer is fired; that would result in a spurious timeout
379                # message
380                if timer_fired.isSet():
381                    print >> fp, "`%s` timed out after %s seconds" % (self.command, self.timeout)
382                    log("[Command timed out after %s seconds] - " % (self.timeout), new_line=False)
383
384        return p.wait()
385
386    def will_run(self):
387        """Determine if this task will run on this platform."""
388        return sys.platform in self.platforms
389
390
391class TaskRunner(object):
392    default_name = "couchbase.log"
393
394    def __init__(self, verbosity=0, task_regexp='', tmp_dir=None,
395                 salt_value=""):
396        self.files = {}
397        self.verbosity = verbosity
398        self.start_time = time.strftime("%Y%m%d-%H%M%S", time.gmtime())
399        self.salt_value = salt_value
400
401        # Depending on platform, mkdtemp() may act unpredictably if passed an empty string.
402        if not tmp_dir:
403            tmp_dir = None
404        else:
405            tmp_dir = os.path.abspath(os.path.expanduser(tmp_dir))
406
407        try:
408            self.tmpdir = tempfile.mkdtemp(dir=tmp_dir)
409        except OSError as e:
410           print "Could not use temporary dir {0}: {1}".format(tmp_dir, e)
411           sys.exit(1)
412
413        # If a dir wasn't passed by --tmp-dir, check if the env var was set and if we were able to use it
414        if not tmp_dir and os.getenv("TMPDIR") and os.path.split(self.tmpdir)[0] != os.getenv("TMPDIR"):
415                log("Could not use TMPDIR {0}".format(os.getenv("TMPDIR")))
416        log("Using temporary dir {0}".format(os.path.split(self.tmpdir)[0]))
417
418        self.task_regexp = re.compile(task_regexp)
419
420        AltExit.register(self.finalize)
421
422    def finalize(self):
423        try:
424            for fp in self.files.iteritems():
425                fp.close()
426        except:
427            pass
428
429        shutil.rmtree(self.tmpdir, ignore_errors=True)
430
431    def collect_file(self, filename):
432        """Add a file to the list of files collected. Used to capture the exact
433        file (including timestamps) from the Couchbase instance.
434
435        filename - Absolute path to file to collect.
436        """
437        if not filename in self.files:
438            try:
439                self.files[filename] = open(filename, 'r')
440            except IOError, e:
441                log("Failed to collect file '%s': %s" % (filename, str(e)))
442        else:
443            log("Unable to collect file '%s' - already collected." % filename)
444
445    def get_file(self, filename):
446        if filename in self.files:
447            fp = self.files[filename]
448        else:
449            fp = open(os.path.join(self.tmpdir, filename), 'w+')
450            self.files[filename] = fp
451
452        return fp
453
454    def header(self, fp, title, subtitle):
455        separator = '=' * 78
456        print >> fp, separator
457        print >> fp, title
458        print >> fp, subtitle
459        print >> fp, separator
460        fp.flush()
461
462    def log_result(self, result):
463        if result == 0:
464            log("OK")
465        else:
466            log("Exit code %d" % result)
467
468    def run_tasks(self, tasks):
469        for task in tasks:
470            self.run(task)
471
472    def run(self, task):
473        if self.task_regexp.match(task.description) is None:
474                log("Skipping task %s because "
475                    "it doesn't match '%s'" % (task.description,
476                                               self.task_regexp.pattern))
477        else:
478            self._run(task)
479
480    def _run(self, task):
481        """Run a task with a file descriptor corresponding to its log file"""
482        if task.will_run():
483            log("%s (%s) - " % (task.description, task.command), new_line=False)
484            if task.privileged and os.getuid() != 0:
485                log("skipped (needs root privs)")
486                return
487
488            task.set_task_runner(self)
489
490            filename = getattr(task, 'log_file', self.default_name)
491            fp = self.get_file(filename)
492            if not task.no_header:
493                self.header(fp, task.description, task.command)
494
495            for i in xrange(task.num_samples):
496                if i > 0:
497                    log("Taking sample %d after %f seconds - " % (i+1, task.interval), new_line=False)
498                    time.sleep(task.interval)
499                result = task.execute(fp)
500                self.log_result(result)
501
502            for artifact in getattr(task, 'artifacts', []):
503                path = artifact
504                if not os.path.isabs(path):
505                    # we assume that "relative" artifacts are produced in the
506                    # self.tmpdir
507                    path = os.path.join(self.tmpdir, path)
508
509                self.collect_file(path)
510
511            fp.flush()
512
513        elif self.verbosity >= 2:
514            log('Skipping "%s" (%s): not for platform %s' % (task.description, task.command, sys.platform))
515
516    def literal(self, description, value, **kwargs):
517        self.run(LiteralTask(description, value, **kwargs))
518
519
520    def redact_and_zip(self, filename, node):
521        files = []
522        redactor = LogRedactor(self.salt_value, self.tmpdir, self.default_name)
523
524        for name, fp in self.files.iteritems():
525            if "users.dets" in name:
526                continue
527            files.append(redactor.redact_file(name, fp.name))
528
529        prefix = "cbcollect_info_%s_%s" % (node, self.start_time)
530        self._zip_helper(prefix, filename, files)
531
532    def close_all_files(self):
533        for name, fp in self.files.iteritems():
534            fp.close()
535
536    def zip(self, filename, node):
537        prefix = "cbcollect_info_%s_%s" % (node, self.start_time)
538
539        files = []
540        for name, fp in self.files.iteritems():
541            files.append(fp.name)
542        self._zip_helper(prefix, filename, files)
543
544    def _zip_helper(self, prefix, filename, files):
545        """Write all our logs to a zipfile"""
546        exe = exec_name("gozip")
547
548        fallback = False
549
550        try:
551            p = subprocess.Popen([exe, "-strip-path", "-prefix", prefix, filename] + files,
552                                 stderr=subprocess.STDOUT,
553                                 stdin=subprocess.PIPE)
554            p.stdin.close()
555            status = p.wait()
556
557            if status != 0:
558                log("gozip terminated with non-zero exit code (%d)" % status)
559        except OSError, e:
560            log("Exception during compression: %s" % e)
561            fallback = True
562
563        if fallback:
564            log("IMPORTANT:")
565            log("  Compression using gozip failed.")
566            log("  Falling back to python implementation.")
567            log("  Please let us know about this and provide console output.")
568
569            self._zip_fallback(filename, prefix, files)
570
571    def _zip_fallback(self, filename, prefix, files):
572        from zipfile import ZipFile, ZIP_DEFLATED
573        zf = ZipFile(filename, mode='w', compression=ZIP_DEFLATED)
574        try:
575            for name in files:
576                zf.write(name,
577                         "%s/%s" % (prefix, os.path.basename(name)))
578        finally:
579            zf.close()
580
581class SolarisTask(Task):
582    platforms = ['sunos5', 'solaris']
583
584
585class LinuxTask(Task):
586    platforms = ['linux2']
587
588
589class WindowsTask(Task):
590    platforms = ['win32', 'cygwin']
591
592
593class MacOSXTask(Task):
594    platforms = ['darwin']
595
596
597class UnixTask(SolarisTask, LinuxTask, MacOSXTask):
598    platforms = SolarisTask.platforms + LinuxTask.platforms + MacOSXTask.platforms
599
600
601class AllOsTask(UnixTask, WindowsTask):
602    platforms = UnixTask.platforms + WindowsTask.platforms
603
604class LiteralTask(AllOsTask):
605    def __init__(self, description, literal, **kwargs):
606        self.description = description
607        self.command = ''
608        self.literal = literal
609        self.__dict__.update(kwargs)
610
611    def execute(self, fp):
612        print >> fp, self.literal
613        return 0
614
615class CollectFile(AllOsTask):
616    def __init__(self, description, file_path, **kwargs):
617        self.description = description
618        self.command = ''
619        self.file_path = file_path
620        self.__dict__.update(kwargs)
621
622    def execute(self, fp):
623        self._task_runner.collect_file(self.file_path)
624        print >> fp, "Collected file %s" % self.file_path
625        return 0
626
627def make_curl_task(name, user, password, url,
628                   timeout=60, log_file="couchbase.log", base_task=AllOsTask,
629                   **kwargs):
630    return base_task(name, ["curl", "-sS", "--proxy", "", "-K-", url],
631                     timeout=timeout,
632                     log_file=log_file,
633                     to_stdin="--user %s:%s" % (user, password),
634                     **kwargs)
635
636def make_cbstats_task(kind, memcached_pass, guts):
637    port = read_guts(guts, "memcached_port")
638    user = read_guts(guts, "memcached_admin")
639    return AllOsTask("memcached stats %s" % kind,
640                     flatten(["cbstats", "-a", "%s:%s" % (local_url_addr, port), kind, "-u", user]),
641                     log_file="stats.log",
642                     timeout=60,
643                     addenv=[("CB_PASSWORD", memcached_pass)])
644
645def get_local_token(guts, port):
646    path = read_guts(guts, "localtoken_path")
647    token = ""
648    try:
649        with open(path, 'r') as f:
650            token = f.read().rstrip('\n')
651    except IOError as e:
652        log("I/O error(%s): %s" % (e.errno, e.strerror))
653    return token
654
655def get_diag_password(guts):
656    port = read_guts(guts, "rest_port")
657    pwd = get_local_token(guts, port)
658    url = "http://%s:%s/diag/password" % (local_url_addr, port)
659    command = ["curl", "-sS", "--proxy", "", "-u", "@localtoken:%s" % pwd, url]
660
661    task = AllOsTask("get diag password", command, timeout=60)
662    output = StringIO()
663    if task.execute(output) == 0:
664        return output.getvalue()
665    else:
666        log(output.getvalue())
667        return ""
668
669def make_query_task(statement, user, password, port):
670    url = "http://%s:%s/query/service?statement=%s" % (local_url_addr, port, urllib.quote(statement))
671
672    return make_curl_task(name="Result of query statement \'%s\'" % statement,
673                          user=user, password=password, url=url)
674
675def make_index_task(name, api, passwd, index_port, logfile="couchbase.log"):
676    index_url = 'http://%s:%s/%s' % (local_url_addr, index_port, api)
677
678    return make_curl_task(name, "@", passwd, index_url, log_file=logfile)
679
680def make_redaction_task():
681    return LiteralTask("Log Redaction", "RedactLevel:none")
682
683def basedir():
684    mydir = os.path.dirname(sys.argv[0])
685    if mydir == "":
686        mydir = "."
687    return mydir
688
689def make_event_log_task():
690    from datetime import datetime, timedelta
691
692    # I found that wmic ntevent can be extremely slow; so limiting the output
693    # to approximately last month
694    limit = datetime.today() - timedelta(days=31)
695    limit = limit.strftime('%Y%m%d000000.000000-000')
696
697    return WindowsTask("Event log",
698                       "wmic ntevent where "
699                       "\""
700                       "(LogFile='application' or LogFile='system') and "
701                       "EventType<3 and TimeGenerated>'%(limit)s'"
702                       "\" "
703                       "get TimeGenerated,LogFile,SourceName,EventType,Message "
704                       "/FORMAT:list" % locals())
705
706
707def make_os_tasks():
708    programs = " ".join(["moxi", "memcached", "beam.smp",
709                         "couch_compact", "godu", "sigar_port",
710                         "cbq-engine", "indexer", "projector", "goxdcr",
711                         "cbft", "eventing-producer", "eventing-consumer"])
712
713    _tasks = [
714        UnixTask("uname", "uname -a"),
715        UnixTask("time and TZ", "date; date -u"),
716        UnixTask("ntp time",
717                 "ntpdate -q pool.ntp.org || "
718                 "nc time.nist.gov 13 || "
719                 "netcat time.nist.gov 13", timeout=60),
720        UnixTask("ntp peers", "ntpq -p"),
721        UnixTask("raw /etc/sysconfig/clock", "cat /etc/sysconfig/clock"),
722        UnixTask("raw /etc/timezone", "cat /etc/timezone"),
723        WindowsTask("System information", "systeminfo"),
724        WindowsTask("Computer system", "wmic computersystem"),
725        WindowsTask("Computer OS", "wmic os"),
726        LinuxTask("System Hardware", "lshw -json || lshw"),
727        SolarisTask("Process list snapshot", "prstat -a -c -n 100 -t -v -L 1 10"),
728        SolarisTask("Process list", "ps -ef"),
729        SolarisTask("Service configuration", "svcs -a"),
730        SolarisTask("Swap configuration", "swap -l"),
731        SolarisTask("Disk activity", "zpool iostat 1 10"),
732        SolarisTask("Disk activity", "iostat -E 1 10"),
733        LinuxTask("Process list snapshot", "export TERM=''; top -Hb -n1 || top -H n1"),
734        LinuxTask("Process list", "ps -AwwL -o user,pid,lwp,ppid,nlwp,pcpu,maj_flt,min_flt,pri,nice,vsize,rss,tty,stat,wchan:12,start,bsdtime,command"),
735        LinuxTask("Raw /proc/buddyinfo", "cat /proc/buddyinfo"),
736        LinuxTask("Raw /proc/meminfo", "cat /proc/meminfo"),
737        LinuxTask("Raw /proc/pagetypeinfo", "cat /proc/pagetypeinfo"),
738        LinuxTask("Raw /proc/zoneinfo", "cat /proc/zoneinfo"),
739        LinuxTask("Raw /proc/vmstat", "cat /proc/vmstat"),
740        LinuxTask("Raw /proc/mounts", "cat /proc/mounts"),
741        LinuxTask("Raw /proc/partitions", "cat /proc/partitions"),
742        LinuxTask("Raw /proc/diskstats", "cat /proc/diskstats; echo ''", num_samples=10, interval=1),
743        LinuxTask("Raw /proc/interrupts", "cat /proc/interrupts"),
744        LinuxTask("Swap configuration", "free -t"),
745        LinuxTask("Swap configuration", "swapon -s"),
746        LinuxTask("Kernel modules", "lsmod"),
747        LinuxTask("Distro version", "cat /etc/redhat-release"),
748        LinuxTask("Distro version", "lsb_release -a"),
749        LinuxTask("Distro version", "cat /etc/SuSE-release"),
750        LinuxTask("Distro version", "cat /etc/issue"),
751        LinuxTask("Installed software", "rpm -qa"),
752        LinuxTask("Hot fix list", "rpm -V couchbase-server"),
753        # NOTE: AFAIK columns _was_ necessary, but it doesn't appear to be
754        # required anymore. I.e. dpkg -l correctly detects stdout as not a
755        # tty and stops playing smart on formatting. Lets keep it for few
756        # years and then drop, however.
757        LinuxTask("Installed software", "COLUMNS=300 dpkg -l"),
758        # NOTE: -V is supported only from dpkg v1.17.2 onwards.
759        LinuxTask("Hot fix list", "COLUMNS=300 dpkg -V couchbase-server"),
760        LinuxTask("Extended iostat", "iostat -x -p ALL 1 10 || iostat -x 1 10"),
761        LinuxTask("Core dump settings", "find /proc/sys/kernel -type f -name '*core*' -print -exec cat '{}' ';'"),
762        UnixTask("sysctl settings", "sysctl -a"),
763        LinuxTask("Relevant lsof output",
764                  "echo %(programs)s | xargs -n1 pgrep | xargs -n1 -r -- lsof -n -p" % locals()),
765        LinuxTask("LVM info", "lvdisplay"),
766        LinuxTask("LVM info", "vgdisplay"),
767        LinuxTask("LVM info", "pvdisplay"),
768        LinuxTask("Block device queue settings",
769                  "find /sys/block/*/queue -type f | xargs grep -vH xxxx | sort"),
770        MacOSXTask("Process list snapshot", "top -l 1"),
771        MacOSXTask("Disk activity", "iostat 1 10"),
772        MacOSXTask("Process list",
773                   "ps -Aww -o user,pid,lwp,ppid,nlwp,pcpu,pri,nice,vsize,rss,tty,"
774                   "stat,wchan:12,start,bsdtime,command"),
775        WindowsTask("Installed software", "wmic product get name, version"),
776        WindowsTask("Service list", "wmic service where state=\"running\" GET caption, name, state"),
777        WindowsTask("Process list", "wmic process"),
778        WindowsTask("Process usage", "tasklist /V /fo list"),
779        WindowsTask("Swap settings", "wmic pagefile"),
780        WindowsTask("Disk partition", "wmic partition"),
781        WindowsTask("Disk volumes", "wmic volume"),
782        UnixTask("Network configuration", "ifconfig -a", interval=10,
783                 num_samples=2),
784        LinuxTask("Network configuration", "echo link addr neigh rule route netns | xargs -n1 -- sh -x -c 'ip $1 list' --"),
785        WindowsTask("Network configuration", "ipconfig /all", interval=10,
786                    num_samples=2),
787        LinuxTask("Raw /proc/net/dev", "cat /proc/net/dev"),
788        LinuxTask("Network link statistics", "ip -s link"),
789        UnixTask("Network status", "netstat -anp || netstat -an"),
790        WindowsTask("Network status", "netstat -anotb"),
791        AllOsTask("Network routing table", "netstat -rn"),
792        LinuxTask("Network socket statistics", "ss -an"),
793        LinuxTask("Extended socket statistics", "ss -an --info --processes", timeout=300),
794        UnixTask("Arp cache", "arp -na"),
795        LinuxTask("Iptables dump", "iptables-save"),
796        UnixTask("Raw /etc/hosts", "cat /etc/hosts"),
797        UnixTask("Raw /etc/resolv.conf", "cat /etc/resolv.conf"),
798        UnixTask("Raw /etc/nsswitch.conf", "cat /etc/nsswitch.conf"),
799        WindowsTask("Arp cache", "arp -a"),
800        WindowsTask("Network Interface Controller", "wmic nic"),
801        WindowsTask("Network Adapter", "wmic nicconfig"),
802        WindowsTask("Active network connection", "wmic netuse"),
803        WindowsTask("Protocols", "wmic netprotocol"),
804        WindowsTask("Hosts file", "type %SystemRoot%\system32\drivers\etc\hosts"),
805        WindowsTask("Cache memory", "wmic memcache"),
806        WindowsTask("Physical memory", "wmic memphysical"),
807        WindowsTask("Physical memory chip info", "wmic memorychip"),
808        WindowsTask("Local storage devices", "wmic logicaldisk"),
809        UnixTask("Filesystem", "df -ha"),
810        UnixTask("System activity reporter", "sar 1 10"),
811        UnixTask("System paging activity", "vmstat 1 10"),
812        UnixTask("System uptime", "uptime"),
813        UnixTask("Last logins of users and ttys", "last -x || last"),
814        UnixTask("couchbase user definition", "getent passwd couchbase"),
815        UnixTask("couchbase user limits", "su couchbase -s /bin/sh -c \"ulimit -a\"",
816                 privileged=True),
817        UnixTask("Interrupt status", "intrstat 1 10"),
818        UnixTask("Processor status", "mpstat 1 10"),
819        UnixTask("System log", "cat /var/adm/messages"),
820        LinuxTask("Raw /proc/uptime", "cat /proc/uptime"),
821        LinuxTask("Systemd journal",
822                  "journalctl | gzip -c > systemd_journal.gz",
823                  change_dir=True, artifacts=['systemd_journal.gz']),
824        LinuxTask("All logs", "tar cz /var/log/syslog* /var/log/dmesg /var/log/messages* /var/log/daemon* /var/log/debug* /var/log/kern.log* 2>/dev/null",
825                  log_file="syslog.tar.gz", no_header=True),
826        LinuxTask("Relevant proc data", "echo %(programs)s | "
827                  "xargs -n1 pgrep | xargs -n1 -- sh -c 'echo $1; cat /proc/$1/status; cat /proc/$1/limits; cat /proc/$1/smaps; cat /proc/$1/numa_maps; cat /proc/$1/task/*/sched; echo' --" % locals()),
828        LinuxTask("Processes' environment", "echo %(programs)s | "
829                  r"xargs -n1 pgrep | xargs -n1 -- sh -c 'echo $1; ( cat /proc/$1/environ | tr \\0 \\n | egrep -v ^CB_MASTER_PASSWORD=\|^CBAUTH_REVRPC_URL=); echo' --" % locals()),
830        LinuxTask("Processes' stack",
831                  "for program in %(programs)s; do for thread in $(pgrep --lightweight $program); do echo $program/$thread:; cat /proc/$thread/stack; echo; done; done" % locals()),
832        LinuxTask("NUMA data", "numactl --hardware"),
833        LinuxTask("NUMA data", "numactl --show"),
834        LinuxTask("NUMA data", "cat /sys/devices/system/node/node*/numastat"),
835        UnixTask("Kernel log buffer", "dmesg -T || dmesg -H || dmesg"),
836        LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/transparent_hugepage/enabled"),
837        LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/transparent_hugepage/defrag"),
838        LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/redhat_transparent_hugepage/enabled"),
839        LinuxTask("Transparent Huge Pages data", "cat /sys/kernel/mm/redhat_transparent_hugepage/defrag"),
840        LinuxTask("Network statistics", "netstat -s"),
841        LinuxTask("Full raw netstat", "cat /proc/net/netstat"),
842        LinuxTask("CPU throttling info", "echo /sys/devices/system/cpu/cpu*/thermal_throttle/* | xargs -n1 -- sh -c 'echo $1; cat $1' --"),
843        LinuxTask("Raw PID 1 scheduler /proc/1/sched", "cat /proc/1/sched | head -n 1"),
844        LinuxTask("Raw PID 1 control groups /proc/1/cgroup", "cat /proc/1/cgroup"),
845        make_event_log_task(),
846        ]
847
848    return _tasks
849
850# stolen from http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
851def iter_flatten(iterable):
852    it = iter(iterable)
853    for e in it:
854        if isinstance(e, (list, tuple)):
855            for f in iter_flatten(e):
856                yield f
857        else:
858            yield e
859
860def flatten(iterable):
861    return [e for e in iter_flatten(iterable)]
862
863def read_guts(guts, key):
864    return guts.get(key, "")
865
866def winquote_path(s):
867    return '"'+s.replace("\\\\", "\\").replace('/', "\\")+'"'
868
869# python's split splits empty string to [''] which doesn't make any
870# sense. So this function works around that.
871def correct_split(string, splitchar):
872    rv = string.split(splitchar)
873    if rv == ['']:
874        rv = []
875    return rv
876
877def make_stats_archives_task(guts, initargs_path):
878    escript = exec_name("escript")
879    escript_wrapper = find_script("escript-wrapper")
880    dump_stats = find_script("dump-stats")
881    stats_dir = read_guts(guts, "stats_dir")
882
883    if dump_stats is None or escript_wrapper is None or not stats_dir:
884        return []
885
886    output_file = "stats_archives.json"
887    return AllOsTask("stats archives",
888                     [escript,
889                      escript_wrapper,
890                      "--initargs-path", initargs_path, "--",
891                      dump_stats, stats_dir, output_file],
892                     change_dir=True,
893                     artifacts=[output_file])
894
895def make_product_task(guts, initargs_path, memcached_pass, options):
896    root = os.path.abspath(os.path.join(initargs_path, "..", "..", "..", ".."))
897    dbdir = os.path.realpath(read_guts(guts, "db_dir"))
898    viewdir = os.path.realpath(read_guts(guts, "idx_dir"))
899    nodes = correct_split(read_guts(guts, "nodes"), ",")
900
901    diag_url = "http://%s:%s/diag?noLogs=1" % (local_url_addr, read_guts(guts, "rest_port"))
902    if not options.multi_node_diag:
903        diag_url += "&oneNode=1"
904
905
906    from distutils.spawn import find_executable
907
908    lookup_cmd = None
909    for cmd in ["dig", "nslookup", "host"]:
910        if find_executable(cmd) is not None:
911            lookup_cmd = cmd
912            break
913
914    lookup_tasks = []
915    if lookup_cmd is not None:
916        lookup_tasks = [UnixTask("DNS lookup information for %s" % node,
917                                 "%(lookup_cmd)s '%(node)s'" % locals())
918                        for node in nodes]
919
920    getent_tasks = [LinuxTask("Name Service Switch "
921                              "hosts database info for %s" % node,
922                              ["getent", "ahosts", node])
923                    for node in nodes]
924
925    query_tasks = []
926    query_port = read_guts(guts, "query_port")
927    if query_port:
928        def make(statement):
929            return make_query_task(statement, user="@",
930                                   password=memcached_pass,
931                                   port=query_port)
932
933        query_tasks = [make("SELECT * FROM system:datastores"),
934                       make("SELECT * FROM system:namespaces"),
935                       make("SELECT * FROM system:keyspaces"),
936                       make("SELECT * FROM system:indexes")]
937
938    index_tasks = []
939    index_port = read_guts(guts, "indexer_http_port")
940    if index_port:
941        index_tasks = [make_index_task("Index definitions are: ", "getIndexStatus",
942                                       memcached_pass, index_port),
943                       make_index_task("Indexer settings are: ", "settings",
944                                       memcached_pass, index_port),
945                       make_index_task("Index storage stats are: ", "stats/storage",
946                                       memcached_pass, index_port),
947                       make_index_task("MOI allocator stats are: ", "stats/storage/mm",
948                                       memcached_pass, index_port),
949                       make_index_task("Indexer Go routine dump: ", "debug/pprof/goroutine?debug=1",
950                                       memcached_pass, index_port, logfile="indexer_pprof.log"),
951                       make_index_task("Indexer Rebalance Tokens: ", "listRebalanceTokens",
952                                       memcached_pass, index_port),
953                       make_index_task("Indexer Metadata Tokens: ", "listMetadataTokens",
954                                       memcached_pass, index_port),
955                       make_index_task("Indexer Memory Profile: ", "debug/pprof/heap?debug=1",
956                                       memcached_pass, index_port, logfile="indexer_mprof.log"),
957        ]
958
959    projector_tasks = []
960    proj_port = read_guts(guts, "projector_port")
961    if proj_port:
962        proj_url = 'http://%s:%s/debug/pprof/goroutine?debug=1' % (local_url_addr, proj_port)
963        projector_tasks = [make_curl_task(name="Projector Go routine dump ",
964                                          user="@", password=memcached_pass,
965                                          url=proj_url, log_file="projector_pprof.log")]
966
967    fts_tasks = []
968    fts_port = read_guts(guts, "fts_http_port")
969    if fts_port:
970        url = 'http://%s:%s/api/diag' % (local_url_addr, fts_port)
971        fts_tasks = [make_curl_task(name="FTS /api/diag: ",
972                                    user="@", password=memcached_pass,
973                                    url=url,
974                                    log_file="fts_diag.json", no_header=True)]
975
976    cbas_tasks = []
977    cbas_port = read_guts(guts, "cbas_admin_port")
978    if cbas_port:
979        diag_url = 'http://%s:%s/analytics/node/diagnostics' % (local_url_addr, cbas_port)
980        cbas_parent_port = read_guts(guts, "cbas_parent_port")
981        pprof_url = 'http://%s:%s/debug/pprof/goroutine?debug=1' % (local_url_addr, cbas_parent_port)
982        cbas_tasks = [make_curl_task(name="Analytics /analytics/node/diagnostics: ",
983                                     user="@", password=memcached_pass,
984                                     url=diag_url,
985                                     log_file="analytics_diag.json", no_header=True),
986                      make_curl_task(name="Analytics Go routine dump: ",
987                                     user="@", password=memcached_pass,
988                                     url=pprof_url,
989                                     log_file="analytics_pprof.log", no_header=True)]
990
991    eventing_tasks = []
992    eventing_port = read_guts(guts, "eventing_http_port")
993    if eventing_port:
994        stats_url = 'http://%s:%s/api/v1/stats?type=full' % (local_url_addr, eventing_port)
995        pprof_url = 'http://%s:%s/debug/pprof/goroutine?debug=1' % (local_url_addr, eventing_port)
996        eventing_tasks = [make_curl_task(name="Eventing /api/v1/stats: ",
997                                         user="@", password=memcached_pass,
998                                         url=stats_url,
999                                         log_file="eventing_stats.json", no_header=True),
1000                          make_curl_task(name="Eventing Go routine dump: ",
1001                                         user="@", password=memcached_pass,
1002                                         url=pprof_url,
1003                                         log_file="eventing_pprof.log", no_header=True)]
1004
1005    _tasks = [
1006        UnixTask("Directory structure",
1007                 ["ls", "-lRai", root]),
1008        UnixTask("Database directory structure",
1009                 ["ls", "-lRai", dbdir]),
1010        UnixTask("Index directory structure",
1011                 ["ls", "-lRai", viewdir]),
1012        UnixTask("couch_dbinfo",
1013                 ["find", dbdir, "-type", "f",
1014                  "-name", "*.couch.*",
1015                  "-exec", "couch_dbinfo", "{}", "+"]),
1016        LinuxTask("Database directory filefrag info",
1017                  ["find", dbdir, "-type", "f", "-exec", "filefrag", "-v", "{}", "+"]),
1018        LinuxTask("Index directory filefrag info",
1019                  ["find", viewdir, "-type", "f", "-exec", "filefrag", "-v", "{}", "+"]),
1020        WindowsTask("Database directory structure",
1021                    "dir /s " + winquote_path(dbdir)),
1022        WindowsTask("Index directory structure",
1023                    "dir /s " + winquote_path(viewdir)),
1024        WindowsTask("Version file",
1025                    "type " + winquote_path(basedir()) + "\\..\\VERSION.txt"),
1026        WindowsTask("Manifest file",
1027                    "type " + winquote_path(basedir()) + "\\..\\manifest.txt"),
1028        WindowsTask("Manifest file",
1029                    "type " + winquote_path(basedir()) + "\\..\\manifest.xml"),
1030        LinuxTask("Version file", "cat '%s/VERSION.txt'" % root),
1031        LinuxTask("Variant file", "cat '%s/VARIANT.txt'" % root),
1032        LinuxTask("Manifest file", "cat '%s/manifest.txt'" % root),
1033        LinuxTask("Manifest file", "cat '%s/manifest.xml'" % root),
1034        LiteralTask("Couchbase config", read_guts(guts, "ns_config")),
1035        LiteralTask("Couchbase static config", read_guts(guts, "static_config")),
1036        LiteralTask("Raw ns_log", read_guts(guts, "ns_log")),
1037        # TODO: just gather those in python
1038        WindowsTask("Memcached logs",
1039                    "cd " + winquote_path(read_guts(guts, "memcached_logs_path")) + " && " +
1040                    "for /f %a IN ('dir memcached.log.* /od /tw /b') do type %a",
1041                    log_file="memcached.log"),
1042        UnixTask("Memcached logs",
1043                 ["sh", "-c", 'cd "$1"; for file in $(ls -tr memcached.log.*); do cat \"$file\"; done', "--", read_guts(guts, "memcached_logs_path")],
1044                 log_file="memcached.log"),
1045        [WindowsTask("Ini files (%s)" % p,
1046                     "type " + winquote_path(p),
1047                     log_file="ini.log")
1048         for  p in read_guts(guts, "couch_inis").split(";")],
1049        UnixTask("Ini files",
1050                 ["sh", "-c", 'for i in "$@"; do echo "file: $i"; cat "$i"; done', "--"] + read_guts(guts, "couch_inis").split(";"),
1051                 log_file="ini.log"),
1052
1053        make_curl_task(name="couchbase diags",
1054                       user="@",
1055                       password=memcached_pass,
1056                       timeout=600,
1057                       url=diag_url,
1058                       log_file="diag.log"),
1059
1060        make_curl_task(name="master events",
1061                       user="@",
1062                       password=memcached_pass,
1063                       timeout=300,
1064                       url='http://%s:%s/diag/masterEvents?o=1' % (local_url_addr, read_guts(guts, "rest_port")),
1065                       log_file="master_events.log",
1066                       no_header=True),
1067
1068        make_curl_task(name="ale configuration",
1069                       user="@",
1070                       password=memcached_pass,
1071                       url='http://%s:%s/diag/ale' % (local_url_addr, read_guts(guts, "rest_port")),
1072                       log_file="couchbase.log"),
1073
1074        [AllOsTask("couchbase logs (%s)" % name, "cbbrowse_logs %s" % name,
1075                   addenv = [("REPORT_DIR", read_guts(guts, "log_path"))],
1076                   log_file="ns_server.%s" % name)
1077         for name in ["debug.log", "info.log", "error.log", "couchdb.log",
1078                      "xdcr_target.log",
1079                      "views.log", "mapreduce_errors.log",
1080                      "stats.log", "babysitter.log",
1081                      "reports.log", "http_access.log",
1082                      "http_access_internal.log", "ns_couchdb.log",
1083                      "goxdcr.log", "query.log", "projector.log", "indexer.log",
1084                      "fts.log", "metakv.log", "json_rpc.log", "eventing.log",
1085                      "analytics.log", "analytics_debug.log", "analytics_shutdown.log", "analytics_trace.json"]],
1086
1087        [make_cbstats_task(kind, memcached_pass, guts)
1088         for kind in ["all", "allocator", "checkpoint", "config",
1089                      "dcp", "dcpagg",
1090                      ["diskinfo", "detail"], ["dispatcher", "logs"],
1091                      "eviction", "failovers", ["hash", "detail"],
1092                      "kvstore", "kvtimings", "memory",
1093                      "prev-vbucket",
1094                      "runtimes", "scheduler",
1095                      "tasks",
1096                      "timings", "uuid",
1097                      "vbucket", "vbucket-details", "vbucket-seqno",
1098                      "warmup", "workload"]],
1099
1100        [AllOsTask("memcached mcstat %s" % kind,
1101                   flatten(["mcstat", "-h", "%s:%s" % (local_url_addr, read_guts(guts, "memcached_port")),
1102                            "-u", read_guts(guts, "memcached_admin"), kind]),
1103                   log_file="stats.log",
1104                   timeout=60,
1105                   addenv=[("CB_PASSWORD", memcached_pass)])
1106         for kind in ["connections", "tracing"]],
1107
1108        [AllOsTask("fts mossScope (%s)" % path,
1109                   ["mossScope", "stats", "diag", path],
1110                   log_file="fts_store_stats.log")
1111         for path in glob.glob(os.path.join(viewdir, "@fts", "*.pindex", "store"))
1112            if any(".moss" in entry for entry in os.listdir(path))],
1113
1114        [AllOsTask("fts scorch zap (%s)" % path,
1115                   ["cbft-bleve", "zap", "footer", path],
1116                   log_file="fts_store_stats.log")
1117         for path in glob.glob(os.path.join(viewdir, "@fts", "*.pindex", "store", "*.zap"))],
1118
1119        [AllOsTask("ddocs for %s (%s)" % (bucket, path),
1120                   ["couch_dbdump", path],
1121                   log_file = "ddocs.log")
1122         for bucket in set(correct_split(read_guts(guts, "buckets"), ",")) - set(correct_split(read_guts(guts, "memcached_buckets"), ","))
1123         for path in glob.glob(os.path.join(dbdir, bucket, "master.couch*"))],
1124
1125        [AllOsTask("Couchstore local documents (%s, %s)" % (bucket, os.path.basename(path)),
1126                   ["couch_dbdump", "--local", path],
1127                   log_file = "couchstore_local.log")
1128        for bucket in set(correct_split(read_guts(guts, "buckets"), ",")) - set(correct_split(read_guts(guts, "memcached_buckets"), ","))
1129        for path in glob.glob(os.path.join(dbdir, bucket, "*.couch.*"))],
1130
1131        # RocksDB has logs per DB (i.e. vBucket). 'LOG' is the most
1132        # recent file, with old files named LOG.old.<timestamp>.
1133        # Sort so we go from oldest -> newest as per other log files.
1134        [AllOsTask("RocksDB Log file (%s, %s)" % (bucket, os.path.basename(path)),
1135                   "cat '%s'" % (log_file),
1136                   log_file="kv_rocks.log")
1137        for bucket in (set(correct_split(read_guts(guts, "buckets"), ",")) -
1138                       set(correct_split(read_guts(guts, "memcached_buckets"), ",")))
1139        for path in glob.glob(os.path.join(dbdir, bucket, "rocksdb.*"))
1140        for log_file in sorted(glob.glob(os.path.join(path, "LOG.old.*"))) + [os.path.join(path, "LOG")]],
1141
1142        [UnixTask("moxi stats (port %s)" % port,
1143                  "echo stats proxy | nc %s %s" % (local_addr, port),
1144                  log_file="stats.log",
1145                  timeout=60)
1146         for port in correct_split(read_guts(guts, "moxi_ports"), ",")],
1147
1148        [AllOsTask("mctimings %s" % stat,
1149                   ["mctimings",
1150                    "-u", read_guts(guts, "memcached_admin"),
1151                    "-h", "%s:%s" % (local_url_addr, read_guts(guts, "memcached_port")),
1152                    "-v"] + stat,
1153                   log_file="stats.log",
1154                   timeout=60,
1155                   addenv=[("CB_PASSWORD", memcached_pass)])
1156         for stat in ([], ["subdoc_execute"])],
1157
1158        CollectFile("Users storage", read_guts(guts, "users_storage_path")),
1159
1160        make_stats_archives_task(guts, initargs_path),
1161        AllOsTask("Phosphor Trace",
1162                  ["kv_trace_dump",
1163                   "-H", "%s:%s" % (local_url_addr, read_guts(guts, "memcached_port")),
1164                   "-u", read_guts(guts, "memcached_admin"),
1165                   "-P", memcached_pass,
1166                   "kv_trace.json"],
1167                  timeout=120,
1168                  log_file="stats.log",
1169                  change_dir=True,
1170                  artifacts=["kv_trace.json"]),
1171        ]
1172
1173    _tasks = flatten([getent_tasks, lookup_tasks, query_tasks, index_tasks,
1174                      projector_tasks, fts_tasks, cbas_tasks, eventing_tasks, _tasks])
1175
1176    return _tasks
1177
1178def find_script(name):
1179    dirs = [basedir(), os.path.join(basedir(), "scripts")]
1180    for d in dirs:
1181        path = os.path.join(d, name)
1182        if os.path.exists(path):
1183            log("Found %s: %s" % (name, path))
1184            return os.path.abspath(path)
1185
1186    return None
1187
1188def get_server_guts(initargs_path):
1189    dump_guts_path = find_script("dump-guts")
1190
1191    if dump_guts_path is None:
1192        log("Couldn't find dump-guts script. Some information will be missing")
1193        return {}
1194
1195    escript = exec_name("escript")
1196    extra_args = os.getenv("EXTRA_DUMP_GUTS_ARGS")
1197    args = [escript, dump_guts_path, "--initargs-path", initargs_path]
1198    if extra_args:
1199        args = args + extra_args.split(";")
1200    print("Checking for server guts in %s..." % initargs_path)
1201    p = subprocess.Popen(args, stdout = subprocess.PIPE)
1202    output = p.stdout.read()
1203    p.wait()
1204    rc = p.returncode
1205    # print("args: %s gave rc: %d and:\n\n%s\n" % (args, rc, output))
1206    tokens = output.rstrip("\0").split("\0")
1207    d = {}
1208    if len(tokens) > 1:
1209        for i in xrange(0, len(tokens), 2):
1210            d[tokens[i]] = tokens[i+1]
1211    return d
1212
1213def guess_utility(command):
1214    if isinstance(command, list):
1215        command = ' '.join(command)
1216
1217    if not command:
1218        return None
1219
1220    if re.findall(r'[|;&]|\bsh\b|\bsu\b|\bfind\b|\bfor\b', command):
1221        # something hard to easily understand; let the human decide
1222        return command
1223    else:
1224        return command.split()[0]
1225
1226def dump_utilities(*args, **kwargs):
1227    specific_platforms = { SolarisTask : 'Solaris',
1228                           LinuxTask :  'Linux',
1229                           WindowsTask : 'Windows',
1230                           MacOSXTask : 'Mac OS X' }
1231    platform_utils = dict((name, set()) for name in specific_platforms.values())
1232
1233    class FakeOptions(object):
1234        def __getattr__(self, name):
1235            return None
1236
1237    tasks = make_os_tasks() + make_product_task({}, "", "", FakeOptions())
1238
1239    for task in tasks:
1240        utility = guess_utility(task.command)
1241        if utility is None:
1242            continue
1243
1244        for (platform, name) in specific_platforms.items():
1245            if isinstance(task, platform):
1246                platform_utils[name].add(utility)
1247
1248    print '''This is an autogenerated, possibly incomplete and flawed list
1249of utilites used by cbcollect_info'''
1250
1251    for (name, utilities) in sorted(platform_utils.items(), key=lambda x: x[0]):
1252        print "\n%s:" % name
1253
1254        for utility in sorted(utilities):
1255            print "        - %s" % utility
1256
1257    sys.exit(0)
1258
1259def stdin_watcher():
1260    fd = sys.stdin.fileno()
1261
1262    while True:
1263        try:
1264            buf = os.read(fd, 1024)
1265            # stdin closed
1266            if not buf:
1267                break
1268
1269            if buf.find('\n') != -1:
1270                break
1271        except OSError, e:
1272            if e.errno != errno.EINTR:
1273                raise
1274
1275def setup_stdin_watcher():
1276    def _in_thread():
1277        try:
1278            stdin_watcher()
1279        finally:
1280            AltExit.exit(2)
1281    th = threading.Thread(target = _in_thread)
1282    th.setDaemon(True)
1283    th.start()
1284
1285class CurlKiller:
1286    def __init__(self, p):
1287        self.p = p
1288    def cleanup(self):
1289        if self.p != None:
1290            print("Killing curl...")
1291            os.kill(self.p.pid, signal.SIGKILL)
1292            print("done")
1293    def disarm(self):
1294        self.p = None
1295
1296def do_upload_and_exit(path, url, proxy):
1297    output_fd, output_file = tempfile.mkstemp()
1298    os.close(output_fd)
1299
1300    AltExit.register(lambda: os.unlink(output_file))
1301
1302    args = ["curl", "-sS",
1303            "--output", output_file,
1304            "--proxy", proxy,
1305            "--write-out", "%{http_code}", "--upload-file", path, url]
1306    AltExit.lock.acquire()
1307    try:
1308        p = subprocess.Popen(args, stdout=subprocess.PIPE)
1309        k = CurlKiller(p)
1310        AltExit.register_and_unlock(k.cleanup)
1311    except Exception, e:
1312        AltExit.lock.release()
1313        raise e
1314
1315    stdout, _ = p.communicate()
1316    k.disarm()
1317
1318    if p.returncode != 0:
1319        sys.exit(1)
1320    else:
1321        if stdout.strip() == '200':
1322            log('Upload path is: %s' % url)
1323            log('Done uploading')
1324            sys.exit(0)
1325        else:
1326            log('HTTP status code: %s' % stdout)
1327            sys.exit(1)
1328
1329def parse_host(host):
1330    url = urlparse.urlsplit(host)
1331    if not url.scheme:
1332        url = urlparse.urlsplit('https://' + host)
1333
1334    return url.scheme, url.netloc, url.path
1335
1336def generate_upload_url(parser, options, zip_filename):
1337    upload_url = None
1338    if options.upload_host:
1339        if not options.upload_customer:
1340            parser.error("Need --customer when --upload-host is given")
1341
1342        scheme, netloc, path = parse_host(options.upload_host)
1343
1344        customer = urllib.quote(options.upload_customer)
1345        fname = urllib.quote(os.path.basename(zip_filename))
1346        if options.upload_ticket:
1347            full_path = '%s/%s/%d/%s' % (path, customer, options.upload_ticket, fname)
1348        else:
1349            full_path = '%s/%s/%s' % (path, customer, fname)
1350
1351        upload_url = urlparse.urlunsplit((scheme, netloc, full_path, '', ''))
1352        log("Will upload collected .zip file into %s" % upload_url)
1353    return upload_url
1354
1355def check_ticket(option, opt, value):
1356    if re.match('^\d{1,7}$', value):
1357        return int(value)
1358    else:
1359        raise optparse.OptionValueError(
1360            "option %s: invalid ticket number: %r" % (opt, value))
1361
1362class CbcollectInfoOptions(optparse.Option):
1363    from copy import copy
1364
1365    TYPES = optparse.Option.TYPES + ("ticket",)
1366    TYPE_CHECKER = copy(optparse.Option.TYPE_CHECKER)
1367    TYPE_CHECKER["ticket"] = check_ticket
1368
1369def main():
1370    # ask all tools to use C locale (MB-12050)
1371    os.environ['LANG'] = 'C'
1372    os.environ['LC_ALL'] = 'C'
1373
1374    mydir = os.path.dirname(sys.argv[0])
1375    #(MB-8239)erl script fails in OSX as it is unable to find COUCHBASE_TOP -ravi
1376    if platform.system() == 'Darwin':
1377        os.environ["COUCHBASE_TOP"] = os.path.abspath(os.path.join(mydir, ".."))
1378
1379    parser = optparse.OptionParser(usage=USAGE, option_class=CbcollectInfoOptions)
1380    parser.add_option("-r", dest="root",
1381                      help="root directory - defaults to %s" % (mydir + "/.."),
1382                      default=os.path.abspath(os.path.join(mydir, "..")))
1383    parser.add_option("-v", dest="verbosity", help="increase verbosity level",
1384                      action="count", default=0)
1385    parser.add_option("-p", dest="product_only", help="gather only product related information",
1386                      action="store_true", default=False)
1387    parser.add_option("-d", action="callback", callback=dump_utilities,
1388                      help="dump a list of commands that cbcollect_info needs")
1389    parser.add_option("--watch-stdin", dest="watch_stdin",
1390                      action="store_true", default=False,
1391                      help=optparse.SUPPRESS_HELP)
1392    parser.add_option("--initargs", dest="initargs", help="server 'initargs' path")
1393    parser.add_option("--multi-node-diag", dest="multi_node_diag",
1394                      action="store_true", default=False,
1395                      help="collect per-node diag  on all reachable nodes (default is just this node)")
1396    parser.add_option("--log-redaction-level", dest="redact_level",
1397                      default="none",
1398                      help="redaction level for the logs collected, none and partial supported (default is none)")
1399    parser.add_option("--log-redaction-salt", dest="salt_value",
1400                      default=str(uuid.uuid4()),
1401                      help="Is used to salt the hashing of tagged data, \
1402                            defaults to random uuid. If input by user it should \
1403                            be provided along with --log-redaction-level option")
1404    parser.add_option("--just-upload-into", dest="just_upload_into",
1405                      help=optparse.SUPPRESS_HELP)
1406    parser.add_option("--upload-host", dest="upload_host",
1407                      help="gather diagnostics and upload it for couchbase support. Gives upload host")
1408    parser.add_option("--customer", dest="upload_customer",
1409                      help="specifies customer name for upload")
1410    parser.add_option("--ticket", dest="upload_ticket", type='ticket',
1411                      help="specifies support ticket number for upload")
1412    parser.add_option("--bypass-sensitive-data", dest="bypass_sensitive_data",
1413                      action="store_true", default=False,
1414                      help="do not collect sensitive data")
1415    parser.add_option("--task-regexp", dest="task_regexp",
1416                      default="",
1417                      help="Run only tasks matching regexp. For debugging purposes only.")
1418    parser.add_option("--tmp-dir", dest="tmp_dir", default=None,
1419                      help="set the temp dir used while processing collected data. Overrides the TMPDIR env variable if set")
1420    parser.add_option("--upload-proxy", dest="upload_proxy", default="",
1421                      help="specifies proxy for upload")
1422    options, args = parser.parse_args()
1423
1424    if len(args) != 1:
1425        parser.error("incorrect number of arguments. Expecting filename to collect diagnostics into")
1426
1427    if options.watch_stdin:
1428        setup_stdin_watcher()
1429
1430    zip_filename = args[0]
1431    if zip_filename[-4:] != '.zip':
1432        zip_filename = zip_filename + '.zip'
1433
1434    zip_dir = os.path.dirname(os.path.abspath(zip_filename))
1435
1436    if not os.access(zip_dir, os.W_OK | os.X_OK):
1437        print("do not have write access to the directory %s" % (zip_dir))
1438        sys.exit(1)
1439
1440    if options.redact_level != "none" and options.redact_level != "partial":
1441        parser.error("Invalid redaction level. Only 'none' and 'partial' are supported.")
1442
1443    redact_zip_file = zip_filename[:-4] + "-redacted" + zip_filename[-4:]
1444    upload_url = ""
1445    if options.redact_level != "none":
1446        upload_url = generate_upload_url(parser, options, redact_zip_file)
1447    else:
1448        upload_url = generate_upload_url(parser, options, zip_filename)
1449
1450
1451    erldir = os.path.join(mydir, 'erlang', 'bin')
1452    if os.name == 'posix':
1453        path = [mydir,
1454                '/opt/couchbase/bin',
1455                erldir,
1456                os.environ['PATH'],
1457                '/bin',
1458                '/sbin',
1459                '/usr/bin',
1460                '/usr/sbin']
1461        os.environ['PATH'] = ':'.join(path)
1462
1463        library_path = [os.path.join(options.root, 'lib')]
1464
1465        current_library_path = os.environ.get('LD_LIBRARY_PATH')
1466        if current_library_path is not None:
1467            library_path.append(current_library_path)
1468
1469        os.environ['LD_LIBRARY_PATH'] = ':'.join(library_path)
1470    elif os.name == 'nt':
1471      path = [mydir, erldir, os.environ['PATH']]
1472      os.environ['PATH'] = ';'.join(path)
1473
1474    if options.just_upload_into != None:
1475        do_upload_and_exit(args[0], options.just_upload_into,
1476                options.upload_proxy)
1477
1478    runner = TaskRunner(verbosity=options.verbosity,
1479                        task_regexp=options.task_regexp,
1480                        tmp_dir=options.tmp_dir,
1481                        salt_value=options.salt_value)
1482    runner.run(make_redaction_task()) # We want this at the top of couchbase.log
1483
1484    if not options.product_only:
1485        runner.run_tasks(make_os_tasks())
1486
1487    initargs_variants = [os.path.abspath(os.path.join(options.root, "var", "lib", "couchbase", "initargs")),
1488                         "/opt/couchbase/var/lib/couchbase/initargs",
1489                         os.path.expanduser("~/Library/Application Support/Couchbase/var/lib/couchbase/initargs")]
1490
1491    if options.initargs != None:
1492        initargs_variants = [options.initargs]
1493
1494    guts = None
1495    guts_initargs_path = None
1496
1497    for initargs_path in initargs_variants:
1498        d = get_server_guts(initargs_path)
1499        # print("for initargs: %s got:\n%s" % (initargs_path, d))
1500        if len(d) > 0:
1501            guts = d
1502            guts_initargs_path = os.path.abspath(initargs_path)
1503            break
1504
1505    if guts is None:
1506        log("Couldn't read server guts. Using some default values.")
1507
1508        prefix = None
1509        if platform.system() == 'Windows':
1510            prefix = 'c:/Program Files/Couchbase/Server'
1511        elif platform.system() == 'Darwin':
1512            prefix = '~/Library/Application Support/Couchbase'
1513        else:
1514            prefix = '/opt/couchbase'
1515
1516        guts = {"db_dir" : os.path.join(prefix, "var/lib/couchbase/data"),
1517                "idx_dir" : os.path.join(prefix, "var/lib/couchbase/data"),
1518                "ns_log_path" : os.path.join(prefix, "var/lib/couchbase/ns_log"),
1519                "log_path" : os.path.join(prefix, "var/lib/couchbase/logs"),
1520                "memcached_logs_path" : os.path.join(prefix, "var/lib/couchbase/logs")}
1521
1522        guts_initargs_path = os.path.abspath(prefix)
1523
1524    ipv6 = read_guts(guts, "ipv6") == "true"
1525    set_local_addr(ipv6)
1526
1527    memcached_password =  get_diag_password(guts)
1528
1529    zip_node = read_guts(guts, "node")
1530    runner.literal("product diag header",
1531                   "Found server initargs at %s (%d)" % (guts_initargs_path, len(guts)))
1532
1533    runner.run_tasks(make_product_task(guts, guts_initargs_path,
1534                                       memcached_password, options))
1535
1536    # Collect breakpad crash dumps.
1537    if options.bypass_sensitive_data:
1538        log("Bypassing Sensitive Data: Breakpad crash dumps")
1539    else:
1540        memcached_breakpad_minidump_dir = read_guts(guts, "memcached_breakpad_minidump_dir")
1541        for dump in glob.glob(os.path.join(memcached_breakpad_minidump_dir, "*.dmp")):
1542            runner.collect_file(dump)
1543
1544        # Collect indexer breakpad minidumps
1545        index_port = read_guts(guts, "indexer_http_port")
1546        if index_port:
1547            indexer_breakpad_minidump_dir = read_guts(guts, "indexer_breakpad_minidump_dir")
1548            if memcached_breakpad_minidump_dir != indexer_breakpad_minidump_dir:
1549                for dump in glob.glob(os.path.join(indexer_breakpad_minidump_dir, "*.dmp")):
1550                    runner.collect_file(dump)
1551
1552    addr = zip_node.split("@")[-1]
1553    if addr == "127.0.0.1" or addr == "::1":
1554        zip_node = '@'.join(zip_node.split("@")[:-1] + [find_primary_addr(ipv6, addr)])
1555
1556    if options.verbosity:
1557        log("Python version: %s" % sys.version)
1558
1559    runner.literal("cbcollect_info log", log_stream.getvalue(),
1560                   log_file="cbcollect_info.log", no_header=True)
1561
1562    runner.close_all_files()
1563
1564    if options.redact_level != "none":
1565        log("Redacting log files to level: %s" % options.redact_level)
1566        runner.redact_and_zip(redact_zip_file, zip_node)
1567
1568    runner.zip(zip_filename, zip_node)
1569
1570    if upload_url and options.redact_level != "none":
1571        do_upload_and_exit(redact_zip_file, upload_url, options.upload_proxy)
1572    elif upload_url:
1573        do_upload_and_exit(zip_filename, upload_url, options.upload_proxy)
1574
1575def find_primary_addr(ipv6, default = None):
1576    Family = socket.AF_INET6 if ipv6 else socket.AF_INET
1577    DnsAddr = "2001:4860:4860::8844" if ipv6 else "8.8.8.8"
1578    s = socket.socket(Family, socket.SOCK_DGRAM)
1579    try:
1580        s.connect((DnsAddr, 56))
1581        if ipv6:
1582            addr, port, _, _ = s.getsockname()
1583        else:
1584            addr, port = s.getsockname()
1585
1586        return addr
1587    except socket.error:
1588        return default
1589    finally:
1590        s.close()
1591
1592def exec_name(name):
1593    if sys.platform == 'win32':
1594        name += ".exe"
1595    return name
1596
1597if __name__ == '__main__':
1598    main()
1599