xref: /3.0.3-GA/couchbase-cli/cbbackupwrapper (revision c3af0306)
1#!/usr/bin/env python
2# -*-python-*-
3
4import pump_transfer
5import pump
6
7import base64
8import optparse
9import os
10import re
11import simplejson as json
12import socket
13import subprocess
14import sys
15import urllib2
16
17"""Written by Daniel Owen owend@couchbase.com on 27 June 2014
18Version 1.4    Last updated 10 July 2014
19
20The current implementation of cbbackup that comes with Couchbase Server 2.5.1
21uses only one thead per node.  Therefore when using cbbackup with the single-node
22parameter we are limited to one thread - this impacts performance.
23
24This script provides a wrapper to invoke multiple cbbackup processes.
25It automatically detects which buckets and vbuckets are
26on the node.  It allow the user to specify how many vbuckets to backup in a single
27cbbackup process and then invokes the necessary number of processes.
28An example invocation is as follows:
29
30python cbbackupwrapper.py http://127.0.0.1:8091 ../backup/ --single-node -n 4 \
31-u Administrator -p myPassword --path /opt/couchbbase/bin/  -v
32
33This will backup all the buckets on node 127.0.0.1 into ../backup
34It will backup 4 vbuckets per cbbackup process
35Access to the cluster is authenticated using username=Administrator and
36password=myPassword.and cbbackup will be found in /opt/couchbase/bin
37
38Run python cbbackupwrapper -h for more information.
39
40See the cbrestorewrapper.py script for restoring backups made with this script."""
41
42bucketList = []
43vbucketList = []
44processes = {}
45
46def argumentParsing():
47    usage = "usage: %prog CLUSTER BACKUPDIR OPTIONS"
48    parser = optparse.OptionParser(usage)
49    opt_extra_help(parser, opt_extra_defaults())
50
51    parser.add_option('-b', '--bucket-source', default='',
52                        help='Specify the bucket to backup.  Defaults to all buckets')
53    parser.add_option('--single-node', action='store_true',
54                        default=False, help='use a single server node from the source only')
55    parser.add_option('-u', '--username', default='Administrator',
56                        help='REST username for source cluster or server node. Default is Administrator')
57    parser.add_option('-p', '--password', default='PASSWORD',
58                        help='REST password for source cluster or server node. Defaults to PASSWORD')
59    parser.add_option('-v', '--verbose', action='store_true',
60                        default=False, help='Enable verbose messaging')
61    parser.add_option('--path', default='.',
62                        help='Specify the path to cbbackup. Defaults to current directory')
63    parser.add_option('--port', default='11210',
64                        help='Specify the bucket port.  Defaults to 11210')
65    parser.add_option('-n', '--number', default='100',
66                        help='Specify the number of vbuckets per process. Defaults to 100')
67    parser.add_option('-x', '--extra', default=None,
68                        help="""Provide extra, uncommon config parameters;
69                        comma-separated key=val(,key=val)* pairs""")
70    try:
71        import pump_bfd2
72        parser.add_option("-m", "--mode",
73                        action="store", type="string", default="diff",
74                        help="backup mode: full, diff or accu [default:%default]")
75    except ImportError:
76        parser.add_option("-m", "--mode",
77                        action="store", type="string", default="full",
78                        help="backup mode: full")
79    options, rest = parser.parse_args()
80    if len(rest) != 2:
81        parser.print_help()
82        sys.exit("\nError: please provide both cluster IP and backup directory path.")
83
84    opt_parse_extra(opts.extra, self.opt_extra_defaults())
85
86    return options, rest[0], rest[1]
87
88def opt_extra_help(parser, extra_defaults):
89    extra_help = "; ".join(["%s=%s (%s)" %
90                           (k, extra_defaults[k][0], extra_defaults[k][1])
91                           for k in sorted(extra_defaults.iterkeys())])
92
93    group = optparse.OptionGroup(parser, "Available extra config parameters (-x)",
94                        extra_help)
95    parser.add_option_group(group)
96
97def opt_extra_defaults():
98    return {
99        "batch_max_size":  (1000,   "Transfer this # of documents per batch"),
100        "batch_max_bytes": (400000, "Transfer this # of bytes per batch"),
101        "cbb_max_mb":      (100000, "Split backup file on destination cluster if it exceeds MB"),
102        "max_retry":       (10,     "Max number of sequential retries if transfer fails"),
103        "report":          (5,      "Number batches transferred before updating progress bar in console"),
104        "report_full":     (2000,   "Number batches transferred before emitting progress information in console"),
105        "recv_min_bytes":  (4096,   "Amount of bytes for every TCP/IP call transferred"),
106        "rehash":          (0,      "For value 1, rehash the partition id's of each item; \
107this is needed when transferring data between clusters with different number of partitions, \
108such as when transferring data from an OSX server to a non-OSX cluster"),
109        "data_only":       (0,      "For value 1, only transfer data from a backup file or cluster"),
110        "design_doc_only": (0,      "For value 1, transfer design documents only from a backup file or cluster"),
111        "seqno":           (0,      "By default, start seqno from beginning."),
112        "backoff_cap":     (10,     "Max backoff time during rebalance period"),
113    }
114
115def findAllVbucketsForBucket(node, bucket, path, port, restport, username, password, single_node):
116    localvbucketlist = []
117    request = urllib2.Request(
118        'http://' + node + ':' + restport + '/pools/default/buckets/' + bucket)
119    base64string = base64.encodestring(
120        '%s:%s' % (username, password)).replace('\n', '')
121    request.add_header('Authorization', 'Basic %s' % base64string)
122    try:
123        response = urllib2.urlopen(request)
124    except:
125        print('Authorization failed.  Please check username and password.')
126        exit(1)
127    data = json.loads(response.read())
128    vbucketserverdata = data['vBucketServerMap']
129    vbucketdata = vbucketserverdata['vBucketMap']
130    serverlist = vbucketserverdata['serverList']
131    # all possibles names / ipaddress for the node
132    aliases = []
133    # check to see if node was given as ip addess
134    matchObj = re.match(r'^\d+.\d+.\d+.\d+$', node, re.I)
135    if matchObj:
136        # node was entered as its IP address
137        nodeip = node
138        aliases.append(nodeip)
139        try:
140            (node, other_names, other_ips) = socket.gethostbyaddr(nodeip)
141            aliases.append(node)
142            aliases + other_names
143        except:
144            pass
145    else:
146        aliases.append(node)
147        nodeip = socket.gethostbyname(node)
148        aliases.append(nodeip)
149
150    aliases = [alias + ":" + port for alias in aliases]
151
152    if args.verbose:
153        print("aliases list is ")
154        for x in aliases:
155            print(str(x))
156        print("server list is")
157        for x in serverlist:
158            print(str(x))
159
160    # find out the index in the serverlist for this node
161    serverindex = -1
162    for i in range(len(serverlist)):
163        for nodewithport in aliases:
164            if nodewithport == serverlist[i]:
165                serverindex = i
166    if serverindex == -1:
167        print serverindex
168        print 'Could not find node:port in server list.'
169        exit(1)
170
171    if single_node:
172        # iterate through all vbuckets and see which are active on this node
173        for i in range(len(vbucketdata)):
174            if vbucketdata[i][0] == serverindex:
175                vbucket = i
176                localvbucketlist.append(vbucket)
177    else:
178        # Just iterate through all vbuckets
179        for i in range(len(vbucketdata)):
180            vbucket = i
181            localvbucketlist.append(vbucket)
182
183    return localvbucketlist
184
185
186# Get the buckets that exist on the cluster
187def getBuckets(node, rest_port, username, password):
188    request = urllib2.Request(
189        'http://' + node + ':' + rest_port + '/pools/default/buckets')
190    base64string = base64.encodestring(
191        '%s:%s' % (username, password)).replace('\n', '')
192    request.add_header('Authorization', 'Basic %s' % base64string)
193    try:
194        response = urllib2.urlopen(request)
195    except:
196        print('Authorization failed.  Please check username and password.')
197        exit(1)
198    bucketsOnCluster = []
199    data = json.loads(response.read())
200    for item in data:
201        bucket = item['name']
202        bucketsOnCluster.append(bucket)
203    return bucketsOnCluster
204
205
206if __name__ == '__main__':
207    # Parse the arguments given.
208    args, cluster, backupDir = argumentParsing()
209
210    # Remove any white-spaces from start and end of strings
211    backupDir = backupDir.strip()
212    path = args.path.strip()
213
214    # Check to see if root backup directory exists
215    if not os.path.isdir(backupDir):
216        try:
217            os.makedirs(backupDir)
218        except:
219            exit("Cannot create backup root directory:%s" % backupDir)
220
221    # Check to see if path is correct
222    if not os.path.isdir(path):
223        print 'The path to cbbackup does not exist'
224        print 'Please run with a different path'
225        exit(1)
226    if not os.path.isfile(os.path.join(path, 'cbbackup')):
227        print 'cbbackup could not be found in ' + path
228        exit(1)
229
230    # Check to see if log directory exists if not create it
231    dir = os.path.join(backupDir, 'logs')
232    try:
233        os.stat(dir)
234    except:
235        try:
236            os.mkdir(dir)
237        except:
238            print('Error trying to create directory ' + dir)
239            exit(1)
240
241    # Separate out node and REST port
242    matchObj = re.match(r'^http://(.*):(\d+)$', cluster, re.I)
243    if matchObj:
244        node = matchObj.group(1)
245        rest = matchObj.group(2)
246    else:
247        print("Please enter the source as http://hostname:port")
248        print("For example http://localhost:8091 or http://127.0.0.1:8091")
249        exit(1)
250
251    # Check to see if backing-up all buckets or just a specified bucket
252    if args.bucket_source == '':
253        bucketList = getBuckets(
254            node, rest, args.username, args.password)
255    else:
256        # Check that the bucket exists
257        for item in getBuckets(node, rest, args.username, args.password):
258            if item == args.bucket_source:
259                bucketList.append(args.bucket_source)
260
261        if len(bucketList) == 0:
262            print 'Bucket ' + args.bucket_source + ' does not exist'
263            print 'Please enter a different bucket'
264            exit(1)
265
266    # For each bucket
267    for item in bucketList:
268        perbucketvbucketlist = findAllVbucketsForBucket(
269            node, item, path, args.port, rest, args.username, args.password, args.single_node)
270        for item in perbucketvbucketlist:
271            if item not in vbucketList:
272                vbucketList.append(item)
273
274    # Handle the case when path has spaces
275    # i.e. /Applications/Couchbase Server.app/Contents/...
276    if os.name == 'nt':
277        path = re.sub(r' ', '^ ', path)
278    else:
279        path = re.sub(r' ', '\ ', path)
280
281    # If a bucket was specfified then set-up the string to pass to cbbackup.
282    specific_bucket = ''
283    if len(bucketList) == 1:
284        specific_bucket = ' -b ' + bucketList[0]
285
286    extra_options = ''
287    if args.extra:
288        extra_options = ' -x ' + args.extra
289
290    mode_options = ''
291    if args.mode:
292        mode_options = ' -m ' + args.mode
293
294    # Group the number of vbuckets per process
295    for i in range(0, len(vbucketList), int(args.number)):
296        chunk = vbucketList[i:i + int(args.number)]
297        vbucketsname = str(chunk[0]) + '-' + str(chunk[-1])
298        command_line = os.path.join(path, 'cbbackup') + ' -v -t 1 --vbucket-list=' + ''.join(str(chunk).split()) + ' http://' \
299            + node + ':' + rest + ' ' + os.path.join(backupDir, vbucketsname) + ' -u ' + args.username \
300            + ' -p ' + args.password + extra_options + mode_options + specific_bucket + ' 2>' + \
301            os.path.join(backupDir, 'logs', vbucketsname) + '.err'
302        if args.verbose:
303            print command_line
304        p = subprocess.Popen(command_line, shell=True)
305        processes[p] = vbucketsname
306
307    # Did we backup anything?
308    if len(processes) == 0:
309        print 'Did not backup anything'
310        print 'Please check that you have the buckets on ' + args.node
311        exit(1)
312    else:
313        print 'Waiting for the backup to complete...'
314        successCount = 0
315        for p in processes:
316            p.wait()
317            if p.returncode == 1:
318                print 'Error with backup - look in ' + os.path.join(backupDir, 'logs', processes[p]) + '.err for details'
319            else:
320                successCount += 1
321
322        if successCount == len(processes):
323            print 'SUCCESSFULLY COMPLETED!'
324        else:
325            print 'ERROR!'
326            exit(1)
327