1import copy 2from documentgenerator import DocumentGenerator 3import re 4import datetime 5import json 6import random, string 7import os 8import logger 9 10from data import COUNTRIES, COUNTRY_CODE, FIRST_NAMES, LAST_NAMES 11 12log = logger.Logger.get_logger() 13 14class TuqGenerators(object): 15 16 def __init__(self, log, full_set): 17 self.log = log 18 self.full_set = full_set 19 self.query = None 20 self.type_args = {} 21 self.nests = self._all_nested_objects(full_set[0]) 22 self.type_args['str'] = [attr[0] for attr in full_set[0].iteritems() 23 if isinstance(attr[1], unicode)] 24 self.type_args['int'] = [attr[0] for attr in full_set[0].iteritems() 25 if isinstance(attr[1], int)] 26 self.type_args['float'] = [attr[0] for attr in full_set[0].iteritems() 27 if isinstance(attr[1], float)] 28 self.type_args['bool'] = [attr[0] for attr in full_set[0].iteritems() 29 if isinstance(attr[1], bool)] 30 self.type_args['list_str'] = [attr[0] for attr in full_set[0].iteritems() 31 if isinstance(attr[1], list) and isinstance(attr[1][0], unicode)] 32 self.type_args['list_int'] = [attr[0] for attr in full_set[0].iteritems() 33 if isinstance(attr[1], list) and isinstance(attr[1][0], int)] 34 self.type_args['list_obj'] = [attr[0] for attr in full_set[0].iteritems() 35 if isinstance(attr[1], list) and isinstance(attr[1][0], dict)] 36 self.type_args['obj'] = [attr[0] for attr in full_set[0].iteritems() 37 if isinstance(attr[1], dict)] 38 for obj in self.type_args['obj']: 39 self.type_args['_obj%s_str' % (self.type_args['obj'].index(obj))] = [attr[0] for attr in full_set[0][obj].iteritems() 40 if isinstance(attr[1], str)] 41 self.type_args['_obj%s_int'% (self.type_args['obj'].index(obj))] = [attr[0] for attr in full_set[0][obj].iteritems() 42 if isinstance(attr[1], int)] 43 for obj in self.type_args['list_obj']: 44 self.type_args['_list_obj%s_str' % (self.type_args['list_obj'].index(obj))] = [attr[0] for attr in full_set[0][obj][0].iteritems() 45 if isinstance(attr[1], str) or isinstance(attr[1], unicode)] 46 self.type_args['_list_obj%s_int'% (self.type_args['list_obj'].index(obj))] = [attr[0] for attr in full_set[0][obj][0].iteritems() 47 if isinstance(attr[1], int)] 48 for i in xrange(2, 5): 49 self.type_args['nested_%sl' % i] = [attr for attr in self.nests if len(attr.split('.')) == i] 50 for i in xrange(2, 5): 51 self.type_args['nested_list_%sl' % i] = [attr[0] for attr in self.nests.iteritems() if len(attr[0].split('.')) == i and isinstance(attr[1], list)] 52 self._clear_current_query() 53 54 def generate_query(self, template): 55 query = template 56 for name_type, type_arg in self.type_args.iteritems(): 57 for attr_type_arg in type_arg: 58 query = query.replace('$%s%s' % (name_type, type_arg.index(attr_type_arg)), attr_type_arg) 59 for expr in [' where ', ' select ', ' from ', ' order by', ' limit ', 'end', 60 ' offset ', ' count(' , 'group by', 'unnest', 'min', 'satisfies']: 61 query = query.replace(expr, expr.upper()) 62 self.log.info("Generated query to be run: '''%s'''" % query) 63 self.query = query 64 return query 65 66 def generate_expected_result(self, print_expected_result = True): 67 try: 68 self._create_alias_map() 69 from_clause = self._format_from_clause() 70 log.info("FROM clause ===== is %s" % from_clause) 71 where_clause = self._format_where_clause(from_clause) 72 log.info("WHERE clause ===== is %s" % where_clause) 73 unnest_clause = self._format_unnest_clause(from_clause) 74 log.info("UNNEST clause ===== is %s" % unnest_clause) 75 select_clause = self._format_select_clause(from_clause) 76 log.info("SELECT clause ===== is %s" % select_clause) 77 result = self._filter_full_set(select_clause, where_clause, unnest_clause) 78 result = self._order_results(result) 79 result = self._limit_and_offset(result) 80 if print_expected_result: 81 log.info("Expected result is %s ..." % str(result[:15])) 82 return result 83 finally: 84 self._clear_current_query() 85 86 def _all_nested_objects(self, d): 87 def items(): 88 for key, value in d.items(): 89 if isinstance(value, dict): 90 for subkey, subvalue in self._all_nested_objects(value).items(): 91 yield key + "." + subkey, subvalue 92 else: 93 yield key, value 94 return dict(items()) 95 96 def _create_alias_map(self): 97 query_dict = self.query.split() 98 for word in query_dict: 99 if word.upper() == 'AS': 100 self.aliases[query_dict[query_dict.index(word) + 1]] = query_dict[query_dict.index(word) - 1] 101 102 def _format_where_clause(self, from_clause=None): 103 if self.query.find('WHERE') == -1: 104 return None 105 clause = re.sub(r'ORDER BY.*', '', re.sub(r'.*WHERE', '', self.query)) 106 clause = re.sub(r'GROUP BY.*', '', clause) 107 attributes = self.get_all_attributes() 108 conditions = clause.replace('IS NULL', 'is None') 109 conditions = conditions.replace('IS NOT NULL', 'is not None') 110 satisfy_expr = self.format_satisfy_clause() 111 if satisfy_expr: 112 conditions = re.sub(r'ANY.*END', '', conditions).strip() 113 regex = re.compile("[\w']+\.[\w']+") 114 atts = regex.findall(conditions) 115 for att in atts: 116 parent, child = att.split('.') 117 if parent in attributes: 118 conditions = conditions.replace(' %s.%s ' % (parent, child), 119 ' doc["%s"]["%s"] ' % (parent, child)) 120 else: 121 if parent not in self.aliases: 122 conditions = conditions.replace(' %s.%s ' % (parent, child), 123 ' doc["%s"] ' % (child)) 124 elif self.aliases[parent] in attributes: 125 conditions = conditions.replace(' %s.%s ' % (parent, child), 126 ' doc["%s"]["%s"] ' % (self.aliases[parent], child)) 127 else: 128 conditions = conditions.replace(' %s.%s ' % (parent, child), 129 ' doc["%s"] ' % (child)) 130 for attr in attributes: 131 conditions = conditions.replace(' %s ' % attr, ' doc["%s"] ' % attr) 132 if satisfy_expr: 133 if conditions: 134 for join in ["AND", "OR"]: 135 present = conditions.find(join) 136 if present > -1: 137 conditions = conditions.replace(join, join.lower()) 138 if present > 0: 139 conditions += '' + satisfy_expr 140 break 141 else: 142 conditions = satisfy_expr + ' ' + conditions 143 break 144 else: 145 conditions += '' + satisfy_expr 146 if from_clause and from_clause.find('.') != -1: 147 sub_attrs = [att for name, group in self.type_args.iteritems() 148 for att in group if att not in attributes] 149 for attr in sub_attrs: 150 conditions = conditions.replace(' %s ' % attr, ' doc["%s"] ' % attr) 151 conditions = conditions.replace('doc[', 'doc["%s"][' % from_clause.split('.')[-1]) 152 conditions = conditions.replace(' = ', ' == ') 153 return conditions 154 155 def _format_from_clause(self): 156 clause = re.sub(r'ORDER BY.*', '', re.sub(r'.*FROM', '', self.query)).strip() 157 clause = re.sub(r'WHERE.*', '', re.sub(r'GROUP BY.*', '', clause)).strip() 158 clause = re.sub(r'SELECT.*', '', clause).strip() 159 if len(clause.split()) == 2: 160 self.aliases[clause.split()[1]] = clause.split()[0] 161 return clause 162 163 def _format_unnest_clause(self, from_clause): 164 if from_clause.find('UNNEST') == -1: 165 return None 166 clause = re.sub(r'.*UNNEST', '', from_clause) 167 attr = clause.split() 168 if len(attr) == 1: 169 clause = 'doc["%s"]' % attr[0] 170 elif len(attr) == 2: 171 attributes = self.get_all_attributes() 172 if attr[0].find('.') != -1: 173 splitted = attr[0].split('.') 174 if splitted[0] not in attributes: 175 alias = [attr[0].split('.')[1],] 176 clause = 'doc["%s"]' % attr[1] 177 for inner in splitted[2:]: 178 alias.append(inner) 179 self.aliases[attr[1]] = tuple(alias) 180 return clause 181 parent, child = attr[0].split('.') 182 if parent in attributes: 183 clause = 'doc["%s"]["%s"]' % (parent, child) 184 self.aliases[attr[1]] = (parent, child) 185 else: 186 if parent not in self.aliases: 187 clause = 'doc["%s"]' % (child) 188 self.aliases[attr[1]] = child 189 elif self.aliases[parent] in attributes: 190 clause = 'doc["%s"]["%s"]' % (self.aliases[parent], child) 191 self.aliases[attr[1]] = (self.aliases[parent], child) 192 else: 193 clause = 'doc["%s"]' % (child) 194 self.aliases[attr[1]] = child 195 else: 196 clause = 'doc["%s"]' % attr[0] 197 self.aliases[attr[1]] = attr[0] 198 elif len(attr) == 3 and ('as' in attr or 'AS' in attr): 199 attributes = self.get_all_attributes() 200 if attr[0].find('.') != -1: 201 parent, child = attr[0].split('.') 202 if parent in attributes: 203 clause = 'doc["%s"]["%s"]' % (parent, child) 204 self.aliases[attr[2]] = (parent, child) 205 else: 206 if parent not in self.aliases: 207 clause = 'doc["%s"]' % (child) 208 self.aliases[attr[2]] = child 209 elif self.aliases[parent] in attributes: 210 clause = 'doc["%s"]["%s"]' % (self.aliases[parent], child) 211 self.aliases[attr[2]] = (self.aliases[parent], child) 212 else: 213 clause = 'doc["%s"]' % (child) 214 self.aliases[attr[2]] = child 215 else: 216 clause = 'doc["%s"]' % attr[0] 217 self.aliases[attr[2]] = attr[0] 218 return clause 219 220 def _format_select_clause(self, from_clause=None): 221 select_clause = re.sub(r'ORDER BY.*', '', re.sub(r'.*SELECT', '', self.query)).strip() 222 select_clause = re.sub(r'WHERE.*', '', re.sub(r'FROM.*', '', select_clause)).strip() 223 select_attrs = select_clause.split(',') 224 if from_clause and from_clause.find('UNNEST') != -1: 225 from_clause = re.sub(r'UNNEST.*', '', from_clause).strip() 226 condition = '{' 227 #handle aliases 228 for attr_s in select_attrs: 229 attr = attr_s.split() 230 if re.match(r'COUNT\(.*\)', attr[0]): 231 attr[0] = re.sub(r'\)', '', re.sub(r'.*COUNT\(', '', attr[0])).strip() 232 self.aggr_fns['COUNT'] = {} 233 if attr[0].upper() == 'DISTINCT': 234 attr = attr[1:] 235 self.distinct= True 236 if attr[0].find('.') != -1: 237 parent, child = attr[0].split('.') 238 attr[0] = child 239 if attr[0] in self.aliases: 240 attr[0] = self.aliases[attr[0]] 241 self.aggr_fns['COUNT']['field'] = attr[0] 242 self.aggr_fns['COUNT']['alias'] = ('$1', attr[-1])[len(attr) > 1] 243 if attr[0] == '*': 244 condition += '"%s" : doc,' % attr[-1] 245 continue 246 elif re.match(r'MIN\(.*\)', attr[0]): 247 attr[0] = re.sub(r'\)', '', re.sub(r'.*MIN\(', '', attr[0])).strip() 248 self.aggr_fns['MIN'] = {} 249 if attr[0].find('.') != -1: 250 parent, child = attr[0].split('.') 251 attr[0] = child 252 if attr[0] in self.aliases: 253 attr[0] = self.aliases[attr[0]] 254 self.aggr_fns['MIN']['field'] = attr[0] 255 self.aggr_fns['MIN']['alias'] = ('$1', attr[-1])[len(attr) > 1] 256 self.aliases[('$1', attr[-1])[len(attr) > 1]] = attr[0] 257 condition += '"%s": doc["%s"]' % (self.aggr_fns['MIN']['alias'], self.aggr_fns['MIN']['field']) 258 continue 259 elif attr[0].upper() == 'DISTINCT': 260 attr = attr[1:] 261 self.distinct= True 262 if attr[0] == '*': 263 condition += '"*" : doc,' 264 elif len(attr) == 1: 265 if attr[0].find('.') != -1: 266 if attr[0].find('[') != -1: 267 condition += '"%s" : doc["%s"]%s,' % (attr[0], attr[0][:attr[0].find('[')], attr[0][attr[0].find('['):]) 268 elif attr[0].split('.')[1] == '*': 269 condition = 'doc["%s"]' % (attr[0].split('.')[0]) 270 return condition 271 else: 272 if attr[0].split('.')[0] not in self.get_all_attributes() and\ 273 from_clause.find(attr[0].split('.')[0]) != -1: 274 condition += '"%s" : doc["%s"],' % (attr[0].split('.')[1], attr[0].split('.')[1]) 275 continue 276 else: 277 condition += '"%s" : {%s : doc["%s"]["%s"]},' % (attr[0].split('.')[0], attr[0].split('.')[1], 278 attr[0].split('.')[0], attr[0].split('.')[1]) 279 else: 280 if attr[0].find('[') != -1: 281 condition += '"%s" : doc["%s"]%s,' % (attr[0], attr[0][:attr[0].find('[')], attr[0][attr[0].find('['):]) 282 else: 283 if attr[0] in self.aliases: 284 value = self.aliases[attr[0]] 285 if len(value) > 1: 286 condition += '"%s" : doc["%s"]' % (attr[0], value[0]) 287 for inner in value[1:]: 288 condition += '["%s"]' % (inner) 289 condition += ',' 290 else: 291 condition += '"%s" : doc["%s"],' % (attr[0], attr[0]) 292 elif len(attr) == 2: 293 if attr[0].find('.') != -1: 294 condition += '"%s" : doc["%s"]["%s"],' % (attr[1], attr[0].split('.')[0], attr[0].split('.')[1]) 295 else: 296 condition += '"%s" : doc["%s"],' % (attr[1], attr[0]) 297 self.aliases[attr[1]] = attr[0] 298 elif len(attr) == 3 and ('as' in attr or 'AS' in attr): 299 if attr[0].find('.') != -1: 300 condition += '"%s" : doc["%s"]["%s"],' % (attr[2], attr[0].split('.')[0], attr[0].split('.')[1]) 301 else: 302 if attr[0].find('[') != -1: 303 condition += '"%s" : doc["%s"]%s,' % (attr[2], attr[0][:attr[0].find('[')], attr[0][attr[0].find('['):]) 304 else: 305 condition += '"%s" : doc["%s"],' % (attr[2], attr[0]) 306 condition += '}' 307 if from_clause and from_clause.find('.') != -1: 308 condition = condition.replace('doc[', 'doc["%s"][' % from_clause.split('.')[-1]) 309 return condition 310 311 def _filter_full_set(self, select_clause, where_clause, unnest_clause): 312 diff = self._order_clause_greater_than_select(select_clause) 313 if diff and not self._is_parent_selected(select_clause, diff) and not 'MIN' in self.query: 314 if diff[0].find('][') == -1: 315 select_clause = select_clause[:-1] + ','.join(['"%s" : %s' %([at.replace('"','') for at in re.compile('"\w+"').findall(attr)][0], 316 attr) for attr in self._order_clause_greater_than_select(select_clause)]) + '}' 317 else: 318 for attr in self._order_clause_greater_than_select(select_clause): 319 select_clause = select_clause[:-1] 320 for at in re.compile('"\w+"').findall(attr): 321 if attr.find('][') != -1: 322 attrs_split = [at.replace('"','') for at in re.compile('"\w+"').findall(attr)] 323 select_clause = select_clause + '"%s" : {"%s" : %s},' %(attrs_split[0], attrs_split[1], attr) 324 else: 325 select_clause = select_clause + '"%s" : %s,' %([at.replace('"','') for at in re.compile('"\w+"').findall(attr)][0], attr) 326 select_clause = select_clause + '}' 327 if where_clause: 328 result = [eval(select_clause) for doc in self.full_set if eval(where_clause)] 329 else: 330 result = [eval(select_clause) for doc in self.full_set] 331 if self.distinct: 332 result = [dict(y) for y in set(tuple(x.items()) for x in result)] 333 if unnest_clause: 334 unnest_attr = unnest_clause[5:-2] 335 if unnest_attr in self.aliases: 336 def res_generator(): 337 for doc in result: 338 doc_temp = copy.deepcopy(doc) 339 del doc_temp[unnest_attr] 340 for item in eval(unnest_clause): 341 doc_to_append = copy.deepcopy(doc_temp) 342 doc_to_append[unnest_attr] = copy.deepcopy(item) 343 yield doc_to_append 344 result = list(res_generator()) 345 else: 346 result = [item for doc in result for item in eval(unnest_clause)] 347 if self._create_groups()[0]: 348 result = self._group_results(result) 349 if self.aggr_fns: 350 if not self._create_groups()[0] or len(result) == 0: 351 for fn_name, params in self.aggr_fns.iteritems(): 352 if fn_name == 'COUNT': 353 result = [{params['alias'] : len(result)}] 354 return result 355 356 def _order_clause_greater_than_select(self, select_clause): 357 order_clause = self._get_order_clause() 358 if not order_clause: 359 return None 360 order_clause = order_clause.replace(',"', '"') 361 diff = set(order_clause.split(',')) - set(re.compile('doc\["[\w\']+"\]').findall(select_clause)) 362 diff = [attr.replace(",",'"') for attr in diff if attr != ''] 363 for k, v in self.aliases.iteritems(): 364 if k.endswith(','): 365 self.aliases[k[:-1]] = v 366 del self.aliases[k] 367 if not set(diff) - set(['doc["%s"]' % alias for alias in self.aliases]): 368 return None 369 else: 370 diff = list(set(diff) - set(['doc["%s"]' % alias for alias in self.aliases])) 371 if diff: 372 self.attr_order_clause_greater_than_select = [re.sub(r'"\].*', '', re.sub(r'doc\["', '', attr)) for attr in diff] 373 self.attr_order_clause_greater_than_select = [attr for attr in self.attr_order_clause_greater_than_select if attr] 374 return list(diff) 375 return None 376 377 def _get_order_clause(self): 378 if self.query.find('ORDER BY') == -1: 379 return None 380 order_clause = re.sub(r'LIMIT.*', '', re.sub(r'.*ORDER BY', '', self.query)).strip() 381 order_clause = re.sub(r'OFFSET.*', '', order_clause).strip() 382 condition = "" 383 order_attrs = order_clause.split(',') 384 for attr_s in order_attrs: 385 attr = attr_s.split() 386 if attr[0] in self.aliases.itervalues(): 387 condition += 'doc["%s"],' % (self.get_alias_for(attr[0])) 388 continue 389 if attr[0].find('MIN') != -1: 390 if 'MIN' not in self.aggr_fns: 391 self.aggr_fns['MIN'] = {} 392 attr[0]= attr[0][4:-1] 393 self.aggr_fns['MIN']['field'] = attr[0] 394 self.aggr_fns['MIN']['alias'] = '$gr1' 395 else: 396 if 'alias' in self.aggr_fns['MIN']: 397 condition += 'doc["%s"],' % self.aggr_fns['MIN']['alias'] 398 continue 399 if attr[0].find('.') != -1: 400 attributes = self.get_all_attributes() 401 if attr[0].split('.')[0] in self.aliases and (not self.aliases[attr[0].split('.')[0]] in attributes) or\ 402 attr[0].split('.')[0] in attributes: 403 condition += 'doc["%s"]["%s"],' % (attr[0].split('.')[0],attr[0].split('.')[1]) 404 else: 405 if attr[0].split('.')[0].find('[') != -1: 406 ind = attr[0].split('.')[0].index('[') 407 condition += 'doc["%s"]%s["%s"],' % (attr[0].split('.')[0][:ind], attr[0].split('.')[0][ind:], 408 attr[0].split('.')[1]) 409 else: 410 condition += 'doc["%s"],' % attr[0].split('.')[1] 411 else: 412 if attr[0].find('[') != -1: 413 ind = attr[0].index('[') 414 condition += 'doc["%s"]%s,' % (attr[0].split('.')[0][:ind], attr[0].split('.')[0][ind:]) 415 else: 416 condition += 'doc["%s"],' % attr[0] 417 log.info("ORDER clause ========= is %s" % condition) 418 return condition 419 420 def _order_results(self, result): 421 order_clause = self._get_order_clause() 422 key = None 423 reverse = False 424 if order_clause: 425 all_order_clause = re.sub(r'LIMIT.*', '', re.sub(r'.*ORDER BY', '', self.query)).strip() 426 all_order_clause = re.sub(r'OFFSET.*', '', all_order_clause).strip() 427 order_attrs = all_order_clause.split(',') 428 for attr_s in order_attrs: 429 attr = attr_s.split() 430 if len(attr) == 2 and attr[1].upper() == 'DESC': 431 reverse = True 432 for att_name in re.compile('"[\w\']+"').findall(order_clause): 433 if att_name[1:-1] in self.aliases.itervalues(): 434 order_clause = order_clause.replace(att_name[1:-1], 435 self.get_alias_for(att_name[1:-1])) 436 if self.aggr_fns and att_name[1:-1] in [params['field'] for params in self.aggr_fns.itervalues()]: 437 order_clause = order_clause.replace(att_name[1:-1], 438 [params['alias'] for params in self.aggr_fns.itervalues() 439 if params['field'] == att_name[1:-1]][0]) 440 if order_clause.find(',"') != -1: 441 order_clause = order_clause.replace(',"', '"') 442 key = lambda doc: eval(order_clause) 443 try: 444 result = sorted(result, key=key, reverse=reverse) 445 except: 446 return result 447 if self.attr_order_clause_greater_than_select and not self.parent_selected: 448 for doc in result: 449 for attr in self.attr_order_clause_greater_than_select: 450 if attr.find('.') != -1: 451 attr = attr.split('.')[0] 452 if attr in doc: 453 del doc[attr] 454 elif '$gr1' in doc: 455 del doc['$gr1'] 456 return result 457 458 def _limit_and_offset(self, result): 459 limit_clause = offset_clause = None 460 if self.query.find('LIMIT') != -1: 461 limit_clause = re.sub(r'OFFSET.*', '', re.sub(r'.*LIMIT', '', self.query)).strip() 462 if self.query.find('OFFSET') != -1: 463 offset_clause = re.sub(r'.*OFFSET', '', self.query).strip() 464 if offset_clause: 465 result = result[int(offset_clause):] 466 if limit_clause: 467 result = result[:int(limit_clause)] 468 return result 469 470 def _create_groups(self): 471 if self.query.find('GROUP BY') == -1: 472 return 0, None 473 group_clause = re.sub(r'ORDER BY.*', '', re.sub(r'.*GROUP BY', '', self.query)).strip() 474 if not group_clause: 475 return 0, None 476 attrs = group_clause.split(',') 477 attrs = [attr.strip() for attr in attrs] 478 if len(attrs) == 2: 479 groups = set([(doc[attrs[0]],doc[attrs[1]]) for doc in self.full_set]) 480 elif len(attrs) == 1: 481 if attrs[0].find('.') != -1: 482 if len(attrs[0].split('.')) > 2: 483 groups = set([doc[attrs[0].split('.')[1]][attrs[0].split('.')[2]] 484 for doc in self.full_set]) 485 else: 486 groups = set([doc[attrs[0].split('.')[0]][attrs[0].split('.')[1]] 487 for doc in self.full_set]) 488 else: 489 groups = set([doc[attrs[0]] for doc in self.full_set]) 490 return attrs, groups 491 492 def _group_results(self, result): 493 attrs, groups = self._create_groups() 494 for fn_name, params in self.aggr_fns.iteritems(): 495 if fn_name == 'COUNT': 496 result = [{attrs[0] : group[0], attrs[1] : group[1], 497 params['alias'] : len([doc for doc in result 498 if doc[attrs[0]]==group[0] and doc[attrs[1]]==group[1]])} 499 for group in groups] 500 result = [doc for doc in result if doc[params['alias']] > 0] 501 if fn_name == 'MIN': 502 if isinstance(list(groups)[0], tuple): 503 result = [{attrs[0] : group[0], attrs[1] : group[1], 504 params['alias'] : min([doc[params['field']] for doc in result 505 if doc[attrs[0]]==group[0] and doc[attrs[1]]==group[1]])} 506 for group in groups] 507 else: 508 if attrs[0] in self.aliases.itervalues(): 509 attrs[0] = self.get_alias_for(attrs[0]).replace(',', '') 510 result = [{attrs[0] : group, 511 params['alias'] : min([doc[params['alias']] for doc in result 512 if doc[attrs[0]]==group])} 513 for group in groups] 514 else: 515 result = [dict(y) for y in set(tuple(x.items()) for x in result)] 516 return result 517 518 def get_alias_for(self, value_search): 519 for key, value in self.aliases.iteritems(): 520 if value == value_search: 521 return key 522 return '' 523 524 def get_all_attributes(self): 525 return [att for name, group in self.type_args.iteritems() 526 for att in group if not name.startswith('_')] 527 528 def _is_parent_selected(self, clause, diff): 529 self.parent_selected = len([select_el for select_el in re.compile('doc\["[\w\']+"\]').findall(clause) 530 for diff_el in diff if diff_el.find(select_el) != -1]) > 0 531 return self.parent_selected 532 533 def format_satisfy_clause(self): 534 if self.query.find('ANY') == -1 and self.query.find('EVERY') == -1: 535 return '' 536 satisfy_clause = re.sub(r'.*ANY', '', re.sub(r'END.*', '', self.query)).strip() 537 satisfy_clause = re.sub(r'.*ALL', '', re.sub(r'.*EVERY', '', satisfy_clause)).strip() 538 if not satisfy_clause: 539 return '' 540 main_attr = re.sub(r'SATISFIES.*', '', re.sub(r'.*IN', '', satisfy_clause)).strip() 541 attributes = self.get_all_attributes() 542 if main_attr in attributes: 543 main_attr = 'doc["%s"]' % (main_attr) 544 else: 545 if main_attr.find('.') != -1: 546 parent, child = main_attr.split('.') 547 if parent in self.aliases and self.aliases[parent] in attributes: 548 main_attr = 'doc["%s"]["%s"]' % (self.aliases[parent], child) 549 else: 550 main_attr = 'doc["%s"]' % (child) 551 var = "att" 552 if self.query.find('ANY') != -1: 553 var = re.sub(r'.*ANY', '', re.sub(r'IN.*', '', self.query)).strip() 554 result_clause = 'len([{0} for {1} in {2} if '.format(var, var, main_attr) 555 satisfy_expr = re.sub(r'.*SATISFIES', '', re.sub(r'END.*', '', satisfy_clause)).strip() 556 for expr in satisfy_expr.split(): 557 if expr.find('.') != -1: 558 result_clause += ' {0}["{1}"] '.format(var, expr.split('.')[1]) 559 elif expr.find('=') != -1: 560 result_clause += ' == ' 561 elif expr.upper() in ['AND', 'OR', 'NOT']: 562 result_clause += expr.lower() 563 else: 564 result_clause += ' %s ' % expr 565 result_clause += ']) > 0' 566 return result_clause 567 568 def _clear_current_query(self): 569 self.distinct = False 570 self.aggr_fns = {} 571 self.aliases = {} 572 self.attr_order_clause_greater_than_select = [] 573 self.parent_selected = False 574 575class JsonGenerator: 576 577 def generate_docs_employee(self, docs_per_day = 1, start=0, isShuffle = False): 578 generators = [] 579 types = self._shuffle(['Engineer', 'Sales', 'Support'],isShuffle) 580 join_yr = self._shuffle([2010, 2011],isShuffle) 581 join_mo = self._shuffle(xrange(1, 12 + 1),isShuffle) 582 join_day = self._shuffle(xrange(1, 28 + 1),isShuffle) 583 template = '{{ "name":"{0}", "join_yr":{1}, "join_mo":{2}, "join_day":{3},' 584 template += ' "email":"{4}", "job_title":"{5}", "test_rate":{8}, "skills":{9},' 585 template += '"VMs": {10},' 586 template += ' "tasks_points" : {{"task1" : {6}, "task2" : {7}}}}}' 587 count = 1 588 for info in types: 589 for year in join_yr: 590 for month in join_mo: 591 for day in join_day: 592 random.seed(count) 593 count+=1 594 prefix = "employee"+str(random.random()*100000) 595 name = ["employee-%s" % (str(day))] 596 email = ["%s-mail@couchbase.com" % (str(day))] 597 vms = [{"RAM": month, "os": "ubuntu", 598 "name": "vm_%s" % month, "memory": month}, 599 {"RAM": month, "os": "windows", 600 "name": "vm_%s"% (month + 1), "memory": month} 601 ] 602 generators.append(DocumentGenerator("query-test" + prefix, 603 template, 604 name, [year], [month], [day], 605 email, [info], range(1,10), range(1,10), 606 [float("%s.%s" % (month, month))], 607 [["skill%s" % y for y in join_yr]], 608 [vms], 609 start=start, end=docs_per_day)) 610 return generators 611 612 def generate_docs_employee_array(self, docs_per_day = 1, start=0, isShuffle = False): 613 generators = [] 614 #simple array 615 department = self._shuffle(['Developer', 'Support','HR','Tester','Manager'],isShuffle) 616 sport = ['Badminton','Cricket','Football','Basketball','American Football','ski'] 617 dance = ['classical','bollywood','salsa','hip hop','contemporary','bhangra'] 618 join_yr = self._shuffle([2010, 2011,2012,2013,2014,2015,2016],isShuffle) 619 join_mo = self._shuffle(xrange(1, 12 + 1),isShuffle) 620 join_day = self._shuffle(xrange(1, 28 + 1),isShuffle) 621 engineer = ["Query","Search","Indexing","Storage","Android","IOS"] 622 marketing = ["East","West","North","South","International"] 623 cities = ['Mumbai','Delhi','New York','San Francisco'] 624 streets = ['21st street','12th street','18th street'] 625 countries = ['USA','INDIA','EUROPE'] 626 template = '{{ "name":{0} , "department": "{1}" , "join_yr":{2},' 627 template += ' "email":"{3}", "hobbies": {{ "hobby" : {4} }},' 628 template += ' "tasks": {5}, ' 629 template += '"VMs": {6} , ' 630 template += '"address" : {7} }}' 631 count = 1 632 633 for dept in department: 634 for month in join_mo: 635 for day in join_day: 636 random.seed(count) 637 count += 1 638 prefix = "employee" + str(random.random() * 100000) 639 # array of single objects 640 name = [{"FirstName": "employeefirstname-%s" % (str(day))}, 641 {"MiddleName": "employeemiddlename-%s" % (str(day))}, 642 {"LastName": "employeelastname-%s" % (str(day))}] 643 644 # array inside array inside object 645 sportValue = random.sample(sport, 3) 646 danceValue = random.sample(dance, 3) 647 hobbies = [{"sports": sportValue}, {"dance": danceValue},"art"] 648 email = ["%s-mail@couchbase.com" % (str(day))] 649 joining = random.sample(join_yr,3) 650 # array inside array 651 enggValue = random.sample(engineer, 2) 652 marketingValue = [{"region1" :random.choice(marketing),"region2" :random.choice(marketing)},{"region2" :random.choice(marketing)}] 653 taskValue = [{"Developer": enggValue,"Marketing": marketingValue},"Sales","QA"] 654 # array of multiple objects 655 vms = [{"RAM": month, "os": "ubuntu", 656 "name": "vm_%s" % month, "memory": month}, 657 {"RAM": month, "os": "windows", 658 "name": "vm_%s" % (month + 1), "memory": month}, 659 {"RAM": month, "os": "centos", "name": "vm_%s" % (month + 2), "memory": month}, 660 {"RAM": month, "os": "macos", "name": "vm_%s" % (month + 3), "memory": month} 661 ] 662 663 addressvalue = [[ {"city": random.choice(cities)},{"street":random.choice(streets)}],[{"apartment":123,"country":random.choice(countries)}]] 664 665 generators.append(DocumentGenerator("query-test" + prefix, 666 template, 667 [name], [dept], [[ y for y in joining]], 668 email, [hobbies], 669 [taskValue], 670 [vms],[addressvalue], 671 start=start, end=docs_per_day)) 672 673 return generators 674 675 def generate_docs_sabre(self, docs_per_day=1, start=0, isShuffle=False, years=2, indexes=[1,4,8]): 676 generators = [] 677 all_airports = ["ABR", "ABI", "ATL","BOS", "BUR", "CHI", "MDW", "DAL", "SFO", "SAN", "SJC", "LGA", "JFK", "MSP", 678 "MSQ", "MIA", "LON", "DUB"] 679 dests = [all_airports[i] for i in indexes] 680 join_yr = self._shuffle(xrange(2010, 2010 + years), isShuffle) 681 join_mo = self._shuffle(xrange(1, 12 + 1),isShuffle) 682 join_day = self._shuffle(xrange(1, 28 + 1),isShuffle) 683 template = '{{ "Amount":{0}, "CurrencyCode":"{1}",' 684 template += ' "TotalTax":{{"DecimalPlaces" : {2}, "Amount" : {3}, "CurrencyCode" : "{4}"}},' 685 template += ' "Tax":{5}, "FareBasisCode":{6}, "PassengerTypeQuantity":{7}, "TicketType":"{8}",' 686 template += '"SequenceNumber": {9},' 687 template += ' "DirectionInd" : "{10}", "Itinerary" : {11}, "Destination" : "{12}",' 688 template += '"join_yr":{13}, "join_mo":{14}, "join_day":{15}, "Codes":{16}}}' 689 count = 1 690 for dest in dests: 691 for year in join_yr: 692 for month in join_mo: 693 for day in join_day: 694 random.seed(count) 695 count +=1 696 prefix = '%s_%s-%s-%s' % (dest, year, month, day) 697 amount = [float("%s.%s" % (month, month))] 698 currency = [("USD", "EUR")[month in [1,3,5]]] 699 decimal_tax = [1,2] 700 amount_tax = [day] 701 currency_tax = currency 702 taxes = [{"DecimalPlaces": 2, "Amount": float(amount_tax[0])/3, 703 "TaxCode": "US1", "CurrencyCode": currency}, 704 {"DecimalPlaces": 2, "Amount": float(amount_tax[0])/4, 705 "TaxCode": "US2", "CurrencyCode": currency}, 706 {"DecimalPlaces": 2, "Amount": amount_tax[0] - float(amount_tax[0])/4-\ 707 float(amount_tax[0])/3, 708 "TaxCode": "US2", "CurrencyCode": currency}] 709 710 fare_basis = [{"content": "XA21A0NY", "DepartureAirportCode": dest, 711 "BookingCode": "X", "ArrivalAirportCode": "MSP"}, 712 {"content": "XA21A0NY", "DepartureAirportCode": "MSP", 713 "AvailabilityBreak": True, "BookingCode": "X", 714 "ArrivalAirportCode": "BOS"}] 715 pass_amount = [day] 716 ticket_type = [("eTicket", "testType")[month in [1,3,5]]] 717 sequence = [year] 718 direction = [("oneWay", "return")[month in [2,6,10]]] 719 itinerary = {"OriginDestinationOptions": 720 {"OriginDestinationOption": [ 721 {"FlightSegment": [ 722 {"TPA_Extensions": 723 {"eTicket": {"Ind": True}}, 724 "MarketingAirline": {"Code": dest}, 725 "StopQuantity": month, 726 "DepartureTimeZone": {"GMTOffset": -7}, 727 "OperatingAirline": {"Code": "DL", 728 "FlightNumber": year + month}, 729 "DepartureAirport": {"LocationCode": "SFO"}, 730 "ArrivalTimeZone": {"GMTOffset": -5}, 731 "ResBookDesigCode": "X", 732 "FlightNumber": year + day, 733 "ArrivalDateTime": "2014-07-12T06:07:00", 734 "ElapsedTime": 212, 735 "Equipment": {"AirEquipType": 763}, 736 "DepartureDateTime": "2014-07-12T00:35:00", 737 "MarriageGrp": "O", 738 "ArrivalAirport": {"LocationCode": random.sample(all_airports, 1)}}, 739 {"TPA_Extensions": 740 {"eTicket": {"Ind": False}}, 741 "MarketingAirline": {"Code": dest}, 742 "StopQuantity": month, 743 "DepartureTimeZone": {"GMTOffset": -7}, 744 "OperatingAirline": {"Code": "DL", 745 "FlightNumber": year + month + 1}, 746 "DepartureAirport": {"LocationCode": random.sample(all_airports, 1)}, 747 "ArrivalTimeZone": {"GMTOffset": -3}, 748 "ResBookDesigCode": "X", 749 "FlightNumber": year + day, 750 "ArrivalDateTime": "2014-07-12T06:07:00", 751 "ElapsedTime": 212, 752 "Equipment": {"AirEquipType": 764}, 753 "DepartureDateTime": "2014-07-12T00:35:00", 754 "MarriageGrp": "1", 755 "ArrivalAirport": {"LocationCode": random.sample(all_airports, 1)}}], 756 "ElapsedTime": 619}, 757 {"FlightSegment": [ 758 {"TPA_Extensions": 759 {"eTicket": {"Ind": True}}, 760 "MarketingAirline": {"Code": dest}, 761 "StopQuantity": month, 762 "DepartureTimeZone": {"GMTOffset": -7}, 763 "OperatingAirline": {"Code": "DL", 764 "FlightNumber": year + month}, 765 "DepartureAirport": {"LocationCode": random.sample(all_airports, 1)}, 766 "ArrivalTimeZone": {"GMTOffset": -5}, 767 "ResBookDesigCode": "X", 768 "FlightNumber": year + day, 769 "ArrivalDateTime": "2014-07-12T06:07:00", 770 "ElapsedTime": 212, 771 "Equipment": {"AirEquipType": 763}, 772 "DepartureDateTime": "2014-07-12T00:35:00", 773 "MarriageGrp": "O", 774 "ArrivalAirport": {"LocationCode": random.sample(all_airports, 1)}}, 775 {"TPA_Extensions": 776 {"eTicket": {"Ind": False}}, 777 "MarketingAirline": {"Code": dest}, 778 "StopQuantity": month, 779 "DepartureTimeZone": {"GMTOffset": -7}, 780 "OperatingAirline": {"Code": "DL", 781 "FlightNumber": year + month + 1}, 782 "DepartureAirport": {"LocationCode": random.sample(all_airports, 1)}, 783 "ArrivalTimeZone": {"GMTOffset": -3}, 784 "ResBookDesigCode": "X", 785 "FlightNumber": year + day, 786 "ArrivalDateTime": "2014-07-12T06:07:00", 787 "ElapsedTime": 212, 788 "Equipment": {"AirEquipType": 764}, 789 "DepartureDateTime": "2014-07-12T00:35:00", 790 "MarriageGrp": "1", 791 "ArrivalAirport": {"LocationCode": random.sample(all_airports, 1)}}]}]}, 792 "DirectionInd": "Return"} 793 generators.append(DocumentGenerator(prefix, template, 794 amount, currency, decimal_tax, amount_tax, currency_tax, 795 [taxes], [fare_basis], pass_amount, ticket_type, sequence, 796 direction, [itinerary], [dest], [year], [month], [day], 797 [[dest, dest]], start=start, end=docs_per_day)) 798 return generators 799 800 def generate_docs_sales(self, key_prefix = "sales_dataset", test_data_type = True, start=0, docs_per_day=None, isShuffle = False): 801 generators = [] 802 if end is None: 803 end = self.docs_per_day 804 join_yr = self._shuffle(range(2008, 2008 + self.years),isShuffle) 805 join_mo = self._shuffle(range(1, self.months + 1),isShuffle) 806 join_day = self._shuffle(range(1, self.days + 1),isShuffle) 807 count = 1 808 if test_data_type: 809 template = '{{ "join_yr" : {0}, "join_mo" : {1}, "join_day" : {2},' 810 template += ' "sales" : {3}, "delivery_date" : "{4}", "is_support_included" : {5},' 811 template += ' "is_high_priority_client" : {6}, "client_contact" : "{7}",' 812 template += ' "client_name" : "{8}", "client_reclaims_rate" : {9}}}' 813 sales = self._shuffle([200000, 400000, 600000, 800000],isShuffle) 814 815 is_support = self._shuffle(['true', 'false'],isShuffle) 816 is_priority = self._shuffle(['true', 'false'],isShuffle) 817 contact = "contact_"+str(random.random()*10000000) 818 name ="name_"+str(random.random()*100000) 819 rate = [x * 0.1 for x in range(0, 10)] 820 for year in join_yr: 821 for month in join_mo: 822 for day in join_day: 823 random.seed(count) 824 count +=1 825 prefix = "prefix_"+str(random.random()*100000) 826 delivery = str(datetime.date(year, month, day)) 827 generators.append(DocumentGenerator(key_prefix + prefix, 828 template, 829 [year], [month], [day], 830 sales, [delivery], is_support, 831 is_priority, [contact], 832 [name], rate, 833 start=start, end=end)) 834 return generators 835 836 def generate_docs_bigdata(self, key_prefix = "big_dataset", value_size = 1024, start=0, docs_per_day=1, end=None): 837 if end is None: 838 end = docs_per_day 839 age = range(start, end) 840 name = ['a' * value_size,] 841 template = '{{ "age": {0}, "name": "{1}" }}' 842 843 gen_load = DocumentGenerator(key_prefix, template, age, name, start=start, 844 end=end) 845 return [gen_load] 846 847 def generate_docs_simple(self, key_prefix ="simple_dataset", start=0, docs_per_day = 1000, isShuffle = False): 848 end = docs_per_day 849 age = self._shuffle(range(start, end), isShuffle) 850 name = [key_prefix + '-' + str(i) for i in self._shuffle(xrange(start, end), isShuffle)] 851 template = '{{ "age": {0}, "name": "{1}" }}' 852 gen_load = DocumentGenerator(key_prefix, template, age, name, start=start, end=end) 853 return [gen_load] 854 855 def generate_docs_array(self, key_prefix="array_dataset", start=0, docs_per_day=1, isShuffle=False): 856 COUNTRIES = ["India", "US", "UK", "Japan", "France", "Germany", "China", "Korea", "Canada", "Cuba", 857 "West Indies", "Australia", "New Zealand", "Nepal", "Sri Lanka", "Pakistan", "Mexico", 858 "belgium", "Netherlands", "Brazil", "Costa Rica", "Cambodia", "Fiji", "Finland", "haiti", 859 "Hong Kong", "Iceland", "Iran", "Iraq", "Italy", "Greece", "Jamaica", "Kenya", "Kuwait", "Macau", 860 "Spain","Morocco", "Maldives", "Norway"] 861 862 COUNTRY_CODE = ["Ind123", "US123", "UK123", "Jap123", "Fra123", "Ger123", "Chi123", "Kor123", "Can123", 863 "Cub123", "Wes123", "Aus123", "New123", "Nep123", "Sri123", "Pak123", "Mex123", "bel123", 864 "Net123", "Bra123", "Cos123", "Cam123", "Fij123", "Fin123", "hai123", "Hon123", "Ice123", 865 "Ira123", "Ira123", "Ita123", "Gre123", "Jam123", "Ken123", "Kuw123", "Mac123", "Spa123", 866 "Mor123", "Mal123", "Nor123"] 867 end = docs_per_day 868 generators = [] 869 template = '{{"name": "{0}", "email": "{1}", \ 870 "countries": {2}, "code": {3}}}' 871 for i in range(start, end): 872 countries = [] 873 codes = [] 874 name = ["Passenger-{0}".format(i)] 875 email = ["passenger_{0}@abc.com".format(i)] 876 start_pnt = random.randint(0, len(COUNTRIES)-2) 877 end_pnt = random.randint(start_pnt, len(COUNTRIES)-1) 878 cnt = COUNTRIES[start_pnt:end_pnt] 879 countries.append(cnt) 880 cde = COUNTRY_CODE[start_pnt:end_pnt] 881 codes.append(cde) 882 prefix = "{0}-{1}".format(key_prefix,i) 883 generators.append(DocumentGenerator(prefix, template, 884 name, email, countries, codes, start=start, end=end)) 885 return generators 886 887 def generate_all_type_documents_for_gsi(self, start=0, docs_per_day=10): 888 """ 889 Document fields: 890 name: String 891 age: Number 892 email: Alphanumeric + Special Character 893 premium_customer: Boolean or <NULL> 894 Address: Object 895 {Line 1: Alphanumeric + Special Character 896 Line 2: Alphanumeric + Special Character or <NULL> 897 City: String 898 Country: String 899 postal_code: Number 900 } 901 travel_history: Array of string - Duplicate elements ["India", "US", "UK", "India"] 902 travel_history_code: Array of alphanumerics - Duplicate elements 903 booking_history: Array of objects 904 {source: 905 destination: 906 } 907 credit_cards: Array of numbers 908 secret_combination: Array of mixed data types 909 countries_visited: Array of strings - non-duplicate elements 910 911 :param start: 912 :param docs_per_day: 913 :param isShuffle: 914 :return: 915 """ 916 generators = [] 917 bool_vals = [True, False] 918 template = r'{{ "name":"{0}", "email":"{1}", "age":{2}, "premium_customer":{3}, ' \ 919 '"address":{4}, "travel_history":{5}, "travel_history_code":{6}, "travel_details":{7},' \ 920 '"booking":{8}, "credit_cards":{9}, "secret_combination":{10}, "countries_visited":{11}, ' \ 921 '"question_values":{12}}}' 922 for i in range(docs_per_day): 923 name = random.choice(FIRST_NAMES) 924 age = random.randint(25, 70) 925 last_name = random.choice(LAST_NAMES) 926 dob = "{0}-{1}-{2}".format(random.randint(1970, 1999), 927 random.randint(1, 28), random.randint(1, 12)) 928 email = "{0}.{1}.{2}@abc.com".format(name, last_name, dob.split("-")[1]) 929 premium_customer = random.choice(bool_vals) 930 address = {} 931 address["line_1"] = "Street No. {0}".format(random.randint(100, 200)) 932 address["line_2"] = "null" 933 if not random.choice(bool_vals): 934 address["address2"] = "Building {0}".format(random.randint(1, 6)) 935 address["city"] = "Bangalore" 936 address["contact"] = "{0} {1}".format(name, last_name) 937 address["country"] = "India" 938 address["postal_code"] = "{0}".format(random.randint(560071, 560090)) 939 credit_cards = [random.randint(-1000000, 9999999) for i in range(random.randint(3, 7))] 940 secret_combo = [''.join(random.choice(string.lowercase) for i in range(7)), 941 random.randint(1000000, 9999999)] 942 travel_history = [random.choice(COUNTRIES[:9]) for i in range(1, 11)] 943 travel_history_code = [COUNTRY_CODE[COUNTRIES.index(i)] for i in travel_history] 944 travel_details = [{"country": travel_history[i], "code": travel_history_code[i]} 945 for i in range(len(travel_history))] 946 countries_visited = list(set(travel_history)) 947 booking = {"source": random.choice(COUNTRIES), "destination": random.choice(COUNTRIES)} 948 confirm_question_values = [random.choice(bool_vals) for i in range(5)] 949 prefix = "airline_record_" + str(random.random()*100000) 950 generators.append(DocumentGenerator(prefix, template, [name], [email], [age], [premium_customer], 951 [address], [travel_history], [travel_history_code], [travel_details], 952 [booking], [credit_cards], [secret_combo], [countries_visited], 953 [confirm_question_values], start=start, end=1)) 954 return generators 955 956 def generate_doc_for_aggregate_pushdown(self, start=0, docs_per_day=10): 957 generators = [] 958 bool_vals = [True, False] 959 template = r'{{ "name":"{0}", "business_name": "{1}", "age":{2}, "weight": {3}, "debt":{4}, "passwords":{5}, ' \ 960 r'"transactions":{6}, "address":{7}, "travel_history":{8}, "credit_cards":{9}}}' 961 for i in range(docs_per_day): 962 name = random.choice(FIRST_NAMES) 963 last_name = random.choice(LAST_NAMES) 964 business_name = name + "_" + last_name 965 age = random.randint(25, 70) 966 weight = round(random.uniform(50.1, 115.5), 2) 967 debt = random.randint(-999999, -100000) 968 passwords = [] 969 for i in range(random.randint(10, 50)): 970 passwords.append(random.randint(-10000, 99999)) 971 passwords.append(''.join(random.choice(string.ascii_lowercase) for _ in range(9))) 972 transactions = [random.randint(1000, 9999999) for _ in range(random.randint(10, 50))] 973 address = {} 974 address["country"] = random.choice(COUNTRIES) 975 address["postal_code"] = random.randint(200000, 800000) 976 travel_history = [random.choice(COUNTRIES[:9]) for i in range(1, 11)] 977 credit_cards = [random.randint(-1000000, 9999999) for i in range(random.randint(3, 7))] 978 prefix = "bank_record_" + str(random.random()*100000) 979 generators.append(DocumentGenerator(prefix, template, [name], [business_name], [age], [weight], [debt], 980 [passwords], [transactions], [address], [travel_history], 981 [credit_cards], start=start, end=1)) 982 return generators 983 984 def generate_docs_employee_data(self, key_prefix ="employee_dataset", start=0, docs_per_day = 1, isShuffle = False): 985 generators = [] 986 count = 1 987 sys_admin_info = {"title" : "System Administrator and heliport manager", 988 "desc" : "...Last but not least, as the heliport manager, you will help maintain our growing fleet of remote controlled helicopters, that crash often due to inexperienced pilots. As an independent thinker, you may be free to replace some of the technologies we currently use with ones you feel are better. If so, you should be prepared to discuss and debate the pros and cons of suggested technologies with other stakeholders", 989 "type" : "admin"} 990 ui_eng_info = {"title" : "UI Engineer", 991 "desc" : "Couchbase server UI is one of the crown jewels of our product, which makes the Couchbase NoSQL database easy to use and operate, reports statistics on real time across large clusters, and much more. As a Member of Couchbase Technical Staff, you will design and implement front-end software for cutting-edge distributed, scale-out data infrastructure software systems, which is a pillar for the growing cloud infrastructure.", 992 "type" : "ui"} 993 senior_arch_info = {"title" : "Senior Architect", 994 "desc" : "As a Member of Technical Staff, Senior Architect, you will design and implement cutting-edge distributed, scale-out data infrastructure software systems, which is a pillar for the growing cloud infrastructure. More specifically, you will bring Unix systems and server tech kung-fu to the team.", 995 "type" : "arch"} 996 data_sets = self._shuffle([sys_admin_info, ui_eng_info, senior_arch_info],isShuffle) 997 if end is None: 998 end = self.docs_per_day 999 join_yr = self._shuffle(range(2008, 2008 + self.years),isShuffle) 1000 join_mo = self._shuffle(range(1, self.months + 1),isShuffle) 1001 join_day = self._shuffle(range(1, self.days + 1),isShuffle) 1002 name = ["employee-%s-%s" % (key_prefix, str(i)) for i in xrange(start, end)] 1003 email = ["%s-mail@couchbase.com" % str(i) for i in xrange(start, end)] 1004 template = '{{ "name":"{0}", "join_yr":{1}, "join_mo":{2}, "join_day":{3},' 1005 template += ' "email":"{4}", "job_title":"{5}", "type":"{6}", "desc":"{7}"}}' 1006 for info in data_sets: 1007 for year in join_yr: 1008 for month in join_mo: 1009 for day in join_day: 1010 random.seed(count) 1011 prefix = str(random.random()*100000) 1012 generators.append(DocumentGenerator(key_prefix + prefix, 1013 template, 1014 name, [year], [month], [day], 1015 email, [info["title"]], 1016 [info["type"]], [info["desc"]], 1017 start=start, end=docs_per_day)) 1018 return generators 1019 1020 def generate_docs_using_monster(self, 1021 executatble_path = None, key_prefix= "", bag_dir = "lib/couchbase_helper/monster/bags", 1022 pod_name = None, num_items = 1, seed = None): 1023 "This method runs monster tool using localhost, creates a map of json based on a pattern" 1024 list = [] 1025 command = executatble_path 1026 dest_path = "/tmp/{0}.txt".format(int(random.random()*1000)) 1027 if pod_name == None: 1028 return list 1029 else: 1030 pod_path = "lib/couchbase_helper/monster/prod/%s" % pod_name 1031 command += " -bagdir {0}".format(bag_dir) 1032 if seed != None: 1033 command += " -s {0}".format(seed) 1034 command += " -n {0}".format(num_items) 1035 command += " -o {0}".format(dest_path) 1036 if pod_path != None: 1037 command += " {0}".format(pod_path) 1038 print "Will run the following command: {0}".format(command) 1039 # run command and generate temp file 1040 os.system(command) 1041 # read file and generate list 1042 with open(dest_path) as f: 1043 i= 1 1044 for line in f.readlines(): 1045 key = "{0}{1}".format(key_prefix,i) 1046 data = json.loads(line[:len(line)-1]) 1047 data["_id"] = key 1048 data["mutate"] = 0 1049 list.append(data) 1050 i+=1 1051 os.remove(dest_path) 1052 return list 1053 1054 def _shuffle(self, data, isShuffle): 1055 if isShuffle: 1056 if not isinstance(data, list): 1057 data = [x for x in data] 1058 random.shuffle(data) 1059 return data 1060 return data 1061