147d85ae2STommie McAfee"""pushes stats from serieslydb to CBFS_HOST
27d2bf95fSTommie McAfee
37d2bf95fSTommie McAfeeThis module works by collecting stats from the seriesly database specified in testcfg.py
47d2bf95fSTommie McAfeeand furthermore uses the pandas (python data-anysis) module to store the stats into a dataframe
57d2bf95fSTommie McAfeethat is compatibale with version comparisions in the report generator.  Once in-memory as a
67d2bf95fSTommie McAfeedataframe the stats are dumped to a csv file, compressed and pushed to CBFS.
77d2bf95fSTommie McAfee
87d2bf95fSTommie McAfeeusage: push_stats.py [-h] --spec <dir>/<test.js>
97d2bf95fSTommie McAfee                          --version VERSION
107d2bf95fSTommie McAfee                          --build BUILD
117d2bf95fSTommie McAfee                          [--name NAME]
1247d85ae2STommie McAfee                          [--cluster default]
137d2bf95fSTommie McAfee
147d2bf95fSTommie McAfeeWhere spec name is a required argument specifying the file used to generate stats.
157d2bf95fSTommie McAfee
167d2bf95fSTommie McAfee"""
177d2bf95fSTommie McAfee
187d2bf95fSTommie McAfeeimport sys
197d2bf95fSTommie McAfeesys.path.append(".")
207d2bf95fSTommie McAfeeimport argparse
217d2bf95fSTommie McAfeeimport json
227d2bf95fSTommie McAfeeimport gzip
237d2bf95fSTommie McAfeeimport testcfg as cfg
247d2bf95fSTommie McAfeeimport pandas as pd
257d2bf95fSTommie McAfeeimport os
267d2bf95fSTommie McAfeeimport shutil
277d2bf95fSTommie McAfeefrom seriesly import Seriesly, exceptions
287d2bf95fSTommie McAfeeimport requests
297d2bf95fSTommie McAfee
307d2bf95fSTommie McAfee# cbfs
3147d85ae2STommie McAfeeCBFS_HOST = 'http://10.5.0.128:8484'
327d2bf95fSTommie McAfee
337d2bf95fSTommie McAfee# archives array for keeping track of files to push (archive) into cbfs
347d2bf95fSTommie McAfeearchives = []
357d2bf95fSTommie McAfee
367d2bf95fSTommie McAfee# setup parser
377d2bf95fSTommie McAfeeparser = argparse.ArgumentParser(description='CB System Test Stat Pusher')
387d2bf95fSTommie McAfeeparser.add_argument("--spec", help="path to json file used in test", metavar="<dir>/<test.js>", required = True)
397d2bf95fSTommie McAfeeparser.add_argument("--version",  help="couchbase version.. ie (2.2.0)", required = True)
407d2bf95fSTommie McAfeeparser.add_argument("--build",  help="build number", required = True)
417d2bf95fSTommie McAfeeparser.add_argument("--name", default=None, help="use to override name in test spec")
4247d85ae2STommie McAfeeparser.add_argument("--cluster", default="default", help="should match whatever you set for CB_CLUSTER_TAG")
437d2bf95fSTommie McAfee
4447d85ae2STommie McAfee## connect to seriesly
457d2bf95fSTommie McAfeeconn = Seriesly(cfg.SERIESLY_IP, 3133)
467d2bf95fSTommie McAfee
477d2bf95fSTommie McAfee
487d2bf95fSTommie McAfee
497d2bf95fSTommie McAfee""" getDBData
507d2bf95fSTommie McAfee
517d2bf95fSTommie McAfee  retrieve timeseries data from seriesly
527d2bf95fSTommie McAfee
537d2bf95fSTommie McAfee"""
547d2bf95fSTommie McAfeedef getDBData(db):
557d2bf95fSTommie McAfee  data = None
567d2bf95fSTommie McAfee
577d2bf95fSTommie McAfee  try:
587d2bf95fSTommie McAfee    db = conn[db]
597d2bf95fSTommie McAfee    data = db.get_all()
607d2bf95fSTommie McAfee    data = stripData(data)
617d2bf95fSTommie McAfee  except exceptions.NotExistingDatabase:
627d2bf95fSTommie McAfee    print "DB Not found: %s" % db
637d2bf95fSTommie McAfee    print "cbmonitor running?"
647d2bf95fSTommie McAfee    sys.exit(-1)
657d2bf95fSTommie McAfee
667d2bf95fSTommie McAfee  return (data, None)[len(data) == 0]
677d2bf95fSTommie McAfee
687d2bf95fSTommie McAfee""" stripData
697d2bf95fSTommie McAfee
707d2bf95fSTommie McAfee  use data from the event db to collect only data from preceeding test
717d2bf95fSTommie McAfee
727d2bf95fSTommie McAfee"""
737d2bf95fSTommie McAfeedef stripData(data):
747d2bf95fSTommie McAfee  ev, _ = getSortedEventData()
751c89200dSTommie McAfee
761c89200dSTommie McAfee  if ev is None:
771c89200dSTommie McAfee    return data
781c89200dSTommie McAfee
797d2bf95fSTommie McAfee  start_time = ev[0]
807d2bf95fSTommie McAfee  copy = {}
817d2bf95fSTommie McAfee
827d2bf95fSTommie McAfee  # remove data outside of start_time
837d2bf95fSTommie McAfee  for d in data:
847d2bf95fSTommie McAfee    if d >= start_time:
857d2bf95fSTommie McAfee      copy[d] = data[d]
867d2bf95fSTommie McAfee
877d2bf95fSTommie McAfee  del data
887d2bf95fSTommie McAfee  return copy
897d2bf95fSTommie McAfee
907d2bf95fSTommie McAfeedef getSortedEventData():
911c89200dSTommie McAfee  keys = data = None
921c89200dSTommie McAfee
931c89200dSTommie McAfee  if 'event' in conn.list_dbs():
941c89200dSTommie McAfee    data = conn['event'].get_all()
951c89200dSTommie McAfee    if(len(data) > 0):
961c89200dSTommie McAfee      keys, data = sortDBData(data)
971c89200dSTommie McAfee    else:
981c89200dSTommie McAfee      print "warning: eventdb exists but is empty"
991c89200dSTommie McAfee  else:
1001c89200dSTommie McAfee    print "warning: eventdb not found in seriesly db"
1011c89200dSTommie McAfee
1021c89200dSTommie McAfee  return keys, data
1037d2bf95fSTommie McAfee
1047d2bf95fSTommie McAfee
1057d2bf95fSTommie McAfeedef get_query_params(start_time):
1067d2bf95fSTommie McAfee  query_params = { "group": 10000,
1077d2bf95fSTommie McAfee                   "reducer": "identity",
1087d2bf95fSTommie McAfee                   "from": start_time,
1097d2bf95fSTommie McAfee                   "ptr" : ""
1107d2bf95fSTommie McAfee                 }
1117d2bf95fSTommie McAfee  return query_params
1127d2bf95fSTommie McAfee
1137d2bf95fSTommie McAfee
1147d2bf95fSTommie McAfee"""
1157d2bf95fSTommie McAfee" sort data by its timestamp keys
1167d2bf95fSTommie McAfee"""
1177d2bf95fSTommie McAfeedef sortDBData(data):
1187d2bf95fSTommie McAfee
1197d2bf95fSTommie McAfee  sorted_data = []
1207d2bf95fSTommie McAfee  keys = []
1217d2bf95fSTommie McAfee  if(data):
1227d2bf95fSTommie McAfee    keys = sorted(data.iterkeys())
1237d2bf95fSTommie McAfee
1247d2bf95fSTommie McAfee  for ts in keys:
1257d2bf95fSTommie McAfee    sorted_data.append(data[ts])
1267d2bf95fSTommie McAfee
1277d2bf95fSTommie McAfee  return keys, sorted_data
1287d2bf95fSTommie McAfee
1297d2bf95fSTommie McAfeedef getSortedDBData(db):
1307d2bf95fSTommie McAfee  return sortDBData(getDBData(db))
1317d2bf95fSTommie McAfee
1327d2bf95fSTommie McAfee"""
1337d2bf95fSTommie McAfee" make a timeseries dataframe
1347d2bf95fSTommie McAfee"""
1357d2bf95fSTommie McAfeedef _createDataframe(index, data):
1367d2bf95fSTommie McAfee
1377d2bf95fSTommie McAfee  df = None
1387d2bf95fSTommie McAfee
1397d2bf95fSTommie McAfee  try:
1407d2bf95fSTommie McAfee
1417d2bf95fSTommie McAfee    if(data):
1427d2bf95fSTommie McAfee      df = pd.DataFrame(data)
1437d2bf95fSTommie McAfee      df.index = index
1447d2bf95fSTommie McAfee
1457d2bf95fSTommie McAfee  except ValueError as ex:
1467d2bf95fSTommie McAfee    print "unable to create dataframe: has incorrect format"
1477d2bf95fSTommie McAfee    raise Exception(ex)
1487d2bf95fSTommie McAfee
1497d2bf95fSTommie McAfee  return df
1507d2bf95fSTommie McAfee
1517d2bf95fSTommie McAfee"""
1527d2bf95fSTommie McAfee" get data from seriesly and convert to a 2d timeseries dataframe rows=ts, columns=stats
1537d2bf95fSTommie McAfee"""
1547d2bf95fSTommie McAfeedef createDataframe(db):
1557d2bf95fSTommie McAfee  df = None
1567d2bf95fSTommie McAfee  data = getDBData(db)
1577d2bf95fSTommie McAfee
1587d2bf95fSTommie McAfee  if data:
1597d2bf95fSTommie McAfee    index, data = getSortedDBData(db)
1607d2bf95fSTommie McAfee    df = _createDataframe(index, data)
1617d2bf95fSTommie McAfee  else:
1627d2bf95fSTommie McAfee    print "WARNING: stat db %s is empty!" % db
1637d2bf95fSTommie McAfee
1647d2bf95fSTommie McAfee  return df
1657d2bf95fSTommie McAfee
1667d2bf95fSTommie McAfee
1677d2bf95fSTommie McAfee"""
1687d2bf95fSTommie McAfee" store stats per-phase to csv
1697d2bf95fSTommie McAfee"""
1707d2bf95fSTommie McAfeedef storePhase(ns_dataframe, version, test, build, bucket):
1717d2bf95fSTommie McAfee
1727d2bf95fSTommie McAfee  path = "system-test-results/%s/%s/%s/%s" % (version, test, build, bucket)
1737d2bf95fSTommie McAfee  print "Generating stats: %s" % path
1747d2bf95fSTommie McAfee
1757d2bf95fSTommie McAfee  phase_dataframe = None
1767d2bf95fSTommie McAfee  columns = ns_dataframe.columns
1777d2bf95fSTommie McAfee  event_idx, _ = getSortedEventData()
1781c89200dSTommie McAfee  if event_idx is None:
1791c89200dSTommie McAfee    print "storing all data in single phase"
1801c89200dSTommie McAfee    dataframeToCsv(ns_dataframe, path, test, 0)
1811c89200dSTommie McAfee
1821c89200dSTommie McAfee  else:
1831c89200dSTommie McAfee    # plot each phase
1841c89200dSTommie McAfee    for i in xrange(len(event_idx)):
1851c89200dSTommie McAfee      if i == 0:
1861c89200dSTommie McAfee        phase_dataframe = ns_dataframe[ns_dataframe.index < event_idx[i+1]]
1871c89200dSTommie McAfee      elif i == len(event_idx) - 1:
1881c89200dSTommie McAfee        phase_dataframe = ns_dataframe[ns_dataframe.index > event_idx[i]]
1891c89200dSTommie McAfee      else:
1901c89200dSTommie McAfee        phase_dataframe = ns_dataframe[ (ns_dataframe.index < event_idx[i+1]) &\
1911c89200dSTommie McAfee          (ns_dataframe.index > event_idx[i])]
1921c89200dSTommie McAfee      dataframeToCsv(phase_dataframe, path, test, i)
1937d2bf95fSTommie McAfee
1947d2bf95fSTommie McAfeedef dataframeToCsv(dataframe, path, test, phase_no):
1957d2bf95fSTommie McAfee    ph_csv  = "%s/%s_phase%s.csv" % (path, test, phase_no)
1967d2bf95fSTommie McAfee    ph_csv_gz  = "%s.gz" % ph_csv
1977d2bf95fSTommie McAfee    dataframe.to_csv(ph_csv)
1987d2bf95fSTommie McAfee    f = gzip.open(ph_csv_gz, 'wb')
1997d2bf95fSTommie McAfee    f.writelines(open(ph_csv, 'rb'))
2007d2bf95fSTommie McAfee    f.close()
2017d2bf95fSTommie McAfee    os.remove(ph_csv)
2027d2bf95fSTommie McAfee    archives.append(ph_csv_gz)
2037d2bf95fSTommie McAfee
2047d2bf95fSTommie McAfee
20571c3cee4STommie McAfeedef generateStats(version, test, build, dbs):
2067d2bf95fSTommie McAfee
20771c3cee4STommie McAfee  for db in dbs:
20871c3cee4STommie McAfee    ns_dataframe = createDataframe('%s' % db.name)
2097d2bf95fSTommie McAfee
2107d2bf95fSTommie McAfee    if ns_dataframe:
21171c3cee4STommie McAfee      storePhase(ns_dataframe, version, test, build, db.bucket)
2127d2bf95fSTommie McAfee
2137d2bf95fSTommie McAfee
2147d2bf95fSTommie McAfeedef pushStats():
2157d2bf95fSTommie McAfee
2167d2bf95fSTommie McAfee  for data_file in archives:
2177d2bf95fSTommie McAfee    url = '%s/%s' % (CBFS_HOST, data_file)
2187d2bf95fSTommie McAfee    print "Uploading: " + url
2197d2bf95fSTommie McAfee    suffix = data_file.split('.')[-1]
2207d2bf95fSTommie McAfee
2217d2bf95fSTommie McAfee    if(suffix == 'js'):
2227d2bf95fSTommie McAfee      headers = {'content-type': 'text/javascript'}
2237d2bf95fSTommie McAfee    else:
2247d2bf95fSTommie McAfee      headers = {'content-type': 'application/x-gzip'}
2257d2bf95fSTommie McAfee    data = open(data_file,'rb')
2267d2bf95fSTommie McAfee    r = requests.put(url, data=data, headers=headers)
2277d2bf95fSTommie McAfee    print r.text
2287d2bf95fSTommie McAfee
2297d2bf95fSTommie McAfeedef mkdir(path):
2307d2bf95fSTommie McAfee  if not os.path.exists(path):
2317d2bf95fSTommie McAfee      os.makedirs(path)
2327d2bf95fSTommie McAfee  else:
2337d2bf95fSTommie McAfee      shutil.rmtree(path)
2347d2bf95fSTommie McAfee      os.makedirs(path)
2357d2bf95fSTommie McAfee
23671c3cee4STommie McAfeedef prepareEnv(version, test, build, dbs):
2377d2bf95fSTommie McAfee
23871c3cee4STommie McAfee  for db in dbs:
23971c3cee4STommie McAfee    path = "system-test-results/%s/%s/%s/%s" % (version, test, build, db.bucket)
2407d2bf95fSTommie McAfee    mkdir(path)
2417d2bf95fSTommie McAfee
2427d2bf95fSTommie McAfee
2437d2bf95fSTommie McAfee
2447d2bf95fSTommie McAfeedef loadSpec(spec):
2457d2bf95fSTommie McAfee  try:
2467d2bf95fSTommie McAfee    f = open(spec)
2477d2bf95fSTommie McAfee    specJS = json.loads(f.read())
2487d2bf95fSTommie McAfee    return specJS
2497d2bf95fSTommie McAfee  except Exception as ex:
2507d2bf95fSTommie McAfee    print "Invalid test spec: "+ str(ex)
2517d2bf95fSTommie McAfee    sys.exit(-1)
2527d2bf95fSTommie McAfee
2537d2bf95fSTommie McAfeedef setName(name, spec):
2547d2bf95fSTommie McAfee
2557d2bf95fSTommie McAfee  if name is None:
2567d2bf95fSTommie McAfee    if 'name' in spec:
2577d2bf95fSTommie McAfee      name = str(spec['name'])
2587d2bf95fSTommie McAfee    else:
2597d2bf95fSTommie McAfee      print "test name missing from spec"
2607d2bf95fSTommie McAfee      sys.exit(-1)
2617d2bf95fSTommie McAfee
2627d2bf95fSTommie McAfee  # remove spaces
2637d2bf95fSTommie McAfee  name = name.replace(' ','_')
2647d2bf95fSTommie McAfee  return name
2657d2bf95fSTommie McAfee
26671c3cee4STommie McAfeedef getDBs(cluster = 'default'):
26771c3cee4STommie McAfee
26871c3cee4STommie McAfee  dbs = []
2697d2bf95fSTommie McAfee
2701c89200dSTommie McAfee  if len(conn.list_dbs()) == 0:
2711c89200dSTommie McAfee    print "seriesly database is empty, check SERIESLY_IP in your testcfg.py"
2721c89200dSTommie McAfee    sys.exit(-1)
2731c89200dSTommie McAfee
27471c3cee4STommie McAfee  bucket_dbs = [db_name for db_name in conn.list_dbs() if db_name.find('ns_server'+cluster)==0 ]
2757d2bf95fSTommie McAfee
27671c3cee4STommie McAfee
27771c3cee4STommie McAfee  for db in bucket_dbs:
27871c3cee4STommie McAfee    # filter out dbs with host ip/name attached
27971c3cee4STommie McAfee    if(len([bucket for bucket in bucket_dbs if bucket.find(db) == 0]) != 1):
28071c3cee4STommie McAfee      db = DB('ns_server', db)
28171c3cee4STommie McAfee      dbs.append(db)
28271c3cee4STommie McAfee
28371c3cee4STommie McAfee  atop_dbs = [db_name for db_name in conn.list_dbs() if db_name.find('atop'+cluster)==0]
28471c3cee4STommie McAfee  for db in atop_dbs:
28571c3cee4STommie McAfee    dbs.append(DB('atop'+cluster,db))
28671c3cee4STommie McAfee
28771c3cee4STommie McAfee  latency_dbs = [db_name for db_name in conn.list_dbs() if db_name.find('latency') > 0]
28871c3cee4STommie McAfee  for db in latency_dbs:
28971c3cee4STommie McAfee    dbs.append(DB('',db))
29071c3cee4STommie McAfee
29171c3cee4STommie McAfee  if(len(dbs) == 0):
2927d2bf95fSTommie McAfee    print "no bucket data in seriesly db"
2931c89200dSTommie McAfee    print "did you try with '--cluster %s' ?" % cfg.CB_CLUSTER_TAG
2947d2bf95fSTommie McAfee    sys.exit(-1)
2957d2bf95fSTommie McAfee
29671c3cee4STommie McAfee  return dbs
2977d2bf95fSTommie McAfee
29871c3cee4STommie McAfeedef createInfoFile(version, test, build, dbs, specPath):
2997d2bf95fSTommie McAfee
3007d2bf95fSTommie McAfee  path = "system-test-results/%s/%s/%s" % (version, test, build)
3017d2bf95fSTommie McAfee  fname = '%s/_info.js' % path
3027d2bf95fSTommie McAfee  specName = specPath.split('/')[-1]
3037d2bf95fSTommie McAfee
30471c3cee4STommie McAfee  info = {'buckets' : [db.bucket for db in dbs],
3057d2bf95fSTommie McAfee          'spec' : specName,
3067d2bf95fSTommie McAfee          'files' : archives}
3077d2bf95fSTommie McAfee
3087d2bf95fSTommie McAfee  f = open(fname, 'wb')
3097d2bf95fSTommie McAfee  f.write(json.dumps(info))
3107d2bf95fSTommie McAfee
3117d2bf95fSTommie McAfee  # archive info for pushing to cbfs
3127d2bf95fSTommie McAfee  archives.append(fname)
3137d2bf95fSTommie McAfee
3147d2bf95fSTommie McAfee  # archive spec for pushing to cbfs
3157d2bf95fSTommie McAfee  shutil.copy(specPath, path)
3167d2bf95fSTommie McAfee  archives.append("%s/%s" % (path, specName))
3177d2bf95fSTommie McAfee
31871c3cee4STommie McAfeeclass DB(object):
31971c3cee4STommie McAfee  def __init__(self, prefix, name):
32071c3cee4STommie McAfee    self.prefix = prefix
32171c3cee4STommie McAfee    self.name = name
32271c3cee4STommie McAfee    self.bucket= name[len(prefix):]
32371c3cee4STommie McAfee
3247d2bf95fSTommie McAfeedef main():
3257d2bf95fSTommie McAfee
3267d2bf95fSTommie McAfee
3277d2bf95fSTommie McAfee  args = parser.parse_args()
3287d2bf95fSTommie McAfee  specPath = args.spec
3297d2bf95fSTommie McAfee  spec = loadSpec(specPath)
3307d2bf95fSTommie McAfee  test = setName(args.name, spec)
3317d2bf95fSTommie McAfee  build = args.build
3327d2bf95fSTommie McAfee  version = args.version
33347d85ae2STommie McAfee  cluster = args.cluster
33471c3cee4STommie McAfee  dbs = getDBs(cluster)
3357d2bf95fSTommie McAfee
33671c3cee4STommie McAfee  prepareEnv(version, test, build, dbs)
33771c3cee4STommie McAfee  generateStats(version, test, build, dbs)
33871c3cee4STommie McAfee  createInfoFile(version, test, build, dbs, specPath)
3397d2bf95fSTommie McAfee  pushStats()
3407d2bf95fSTommie McAfee
3417d2bf95fSTommie McAfeeif __name__ == "__main__":
3427d2bf95fSTommie McAfee    main()
343