xref: /3.0.3-GA/couchbase-cli/cbbackupwrapper (revision 7a2097fd)
1#!/usr/bin/env python
2# -*-python-*-
3
4import base64
5import optparse
6import os
7import re
8import simplejson as json
9import socket
10import subprocess
11import sys
12import urllib2
13
14"""Written by Daniel Owen owend@couchbase.com on 27 June 2014
15Version 1.4    Last updated 10 July 2014
16
17The current implementation of cbbackup that comes with Couchbase Server 2.5.1
18uses only one thead per node.  Therefore when using cbbackup with the single-node
19parameter we are limited to one thread - this impacts performance.
20
21This script provides a wrapper to invoke multiple cbbackup processes.
22It automatically detects which buckets and vbuckets are
23on the node.  It allow the user to specify how many vbuckets to backup in a single
24cbbackup process and then invokes the necessary number of processes.
25An example invocation is as follows:
26
27python cbbackupwrapper.py http://127.0.0.1:8091 ../backup/ --single-node -n 4 \
28-u Administrator -p myPassword --path /opt/couchbbase/bin/  -v
29
30This will backup all the buckets on node 127.0.0.1 into ../backup
31It will backup 4 vbuckets per cbbackup process
32Access to the cluster is authenticated using username=Administrator and
33password=myPassword.and cbbackup will be found in /opt/couchbase/bin
34
35Run python cbbackupwrapper -h for more information.
36
37See the cbrestorewrapper.py script for restoring backups made with this script."""
38
39bucketList = []
40vbucketList = []
41processes = {}
42
43def argumentParsing():
44    usage = "usage: %prog CLUSTER BACKUPDIR OPTIONS"
45    parser = optparse.OptionParser(usage)
46
47    parser.add_option('-b', '--bucket-source', default='',
48                        help='Specify the bucket to backup.  Defaults to all buckets')
49    parser.add_option('--single-node', action='store_true',
50                        default=False, help='use a single server node from the source only')
51    parser.add_option('-u', '--username', default='Administrator',
52                        help='REST username for source cluster or server node. Default is Administrator')
53    parser.add_option('-p', '--password', default='PASSWORD',
54                        help='REST password for source cluster or server node. Defaults to PASSWORD')
55    parser.add_option('-v', '--verbose', action='store_true',
56                        default=False, help='Enable verbose messaging')
57    parser.add_option('--path', default='.',
58                        help='Specify the path to cbbackup. Defaults to current directory')
59    parser.add_option('--port', default='11210',
60                        help='Specify the bucket port.  Defaults to 11210')
61    parser.add_option('-n', '--number', default='100',
62                        help='Specify the number of vbuckets per process. Defaults to 100')
63    parser.add_option('-x', '--extra', default=None,
64                        help="""Provide extra, uncommon config parameters;
65                        comma-separated key=val(,key=val)* pairs""")
66    options, rest = parser.parse_args()
67    if len(rest) != 2:
68        parser.print_help()
69        sys.exit("\nError: please provide both cluster IP and backup directory path.")
70
71    return options, rest[0], rest[1]
72
73def findAllVbucketsForBucket(node, bucket, path, port, restport, username, password, single_node):
74    localvbucketlist = []
75    request = urllib2.Request(
76        'http://' + node + ':' + restport + '/pools/default/buckets/' + bucket)
77    base64string = base64.encodestring(
78        '%s:%s' % (username, password)).replace('\n', '')
79    request.add_header('Authorization', 'Basic %s' % base64string)
80    try:
81        response = urllib2.urlopen(request)
82    except:
83        print('Authorization failed.  Please check username and password.')
84        exit(1)
85    data = json.loads(response.read())
86    vbucketserverdata = data['vBucketServerMap']
87    vbucketdata = vbucketserverdata['vBucketMap']
88    serverlist = vbucketserverdata['serverList']
89    # all possibles names / ipaddress for the node
90    aliases = []
91    # check to see if node was given as ip addess
92    matchObj = re.match(r'^\d+.\d+.\d+.\d+$', node, re.I)
93    if matchObj:
94        # node was entered as its IP address
95        nodeip = node
96        aliases.append(nodeip)
97        try:
98            (node, other_names, other_ips) = socket.gethostbyaddr(nodeip)
99            aliases.append(node)
100            aliases + other_names
101        except:
102            print("WARN: Could not find name for nodeip")
103    else:
104        aliases.append(node)
105        nodeip = socket.gethostbyname(node)
106        aliases.append(nodeip)
107
108    aliases = [alias + ":" + port for alias in aliases]
109
110    if args.verbose:
111        print("aliases list is ")
112        for x in aliases:
113            print(str(x))
114        print("server list is")
115        for x in serverlist:
116            print(str(x))
117
118    # find out the index in the serverlist for this node
119    serverindex = -1
120    for i in range(len(serverlist)):
121        for nodewithport in aliases:
122            if nodewithport == serverlist[i]:
123                serverindex = i
124    if serverindex == -1:
125        print serverindex
126        print 'Could not find node:port in server list.'
127        exit(1)
128
129    if single_node:
130        # iterate through all vbuckets and see which are active on this node
131        for i in range(len(vbucketdata)):
132            if vbucketdata[i][0] == serverindex:
133                vbucket = i
134                localvbucketlist.append(vbucket)
135    else:
136        # Just iterate through all vbuckets
137        for i in range(len(vbucketdata)):
138            vbucket = i
139            localvbucketlist.append(vbucket)
140
141    return localvbucketlist
142
143
144# Get the buckets that exist on the cluster
145def getBuckets(node, rest_port, username, password):
146    request = urllib2.Request(
147        'http://' + node + ':' + rest_port + '/pools/default/buckets')
148    base64string = base64.encodestring(
149        '%s:%s' % (username, password)).replace('\n', '')
150    request.add_header('Authorization', 'Basic %s' % base64string)
151    try:
152        response = urllib2.urlopen(request)
153    except:
154        print('Authorization failed.  Please check username and password.')
155        exit(1)
156    bucketsOnCluster = []
157    data = json.loads(response.read())
158    for item in data:
159        bucket = item['name']
160        bucketsOnCluster.append(bucket)
161    return bucketsOnCluster
162
163
164if __name__ == '__main__':
165    # Parse the arguments given.
166    args, cluster, backupDir = argumentParsing()
167
168    # Remove any white-spaces from start and end of strings
169    backupDir = backupDir.strip()
170    path = args.path.strip()
171
172    # Check to see if root backup directory exists
173    if not os.path.isdir(backupDir):
174        try:
175            os.makedirs(backupDir)
176        except:
177            exit("Cannot create backup root directory:%s" % backupDir)
178
179    # Check to see if path is correct
180    if not os.path.isdir(path):
181        print 'The path to cbbackup does not exist'
182        print 'Please run with a different path'
183        exit(1)
184    if not os.path.isfile(os.path.join(path, 'cbbackup')):
185        print 'cbbackup could not be found in ' + path
186        exit(1)
187
188    # Check to see if log directory exists if not create it
189    dir = os.path.join(backupDir, 'logs')
190    try:
191        os.stat(dir)
192    except:
193        try:
194            os.mkdir(dir)
195        except:
196            print('Error trying to create directory ' + dir)
197            exit(1)
198
199    # Separate out node and REST port
200    matchObj = re.match(r'^http://(.*):(\d+)$', cluster, re.I)
201    if matchObj:
202        node = matchObj.group(1)
203        rest = matchObj.group(2)
204    else:
205        print("Please enter the source as http://hostname:port")
206        print("For example http://localhost:8091 or http://127.0.0.1:8091")
207        exit(1)
208
209    # Check to see if backing-up all buckets or just a specified bucket
210    if args.bucket_source == '':
211        bucketList = getBuckets(
212            node, rest, args.username, args.password)
213    else:
214        # Check that the bucket exists
215        for item in getBuckets(node, rest, args.username, args.password):
216            if item == args.bucket_source:
217                bucketList.append(args.bucket_source)
218
219        if len(bucketList) == 0:
220            print 'Bucket ' + args.bucket_source + ' does not exist'
221            print 'Please enter a different bucket'
222            exit(1)
223
224    # For each bucket
225    for item in bucketList:
226        perbucketvbucketlist = findAllVbucketsForBucket(
227            node, item, path, args.port, rest, args.username, args.password, args.single_node)
228        for item in perbucketvbucketlist:
229            if item not in vbucketList:
230                vbucketList.append(item)
231
232    # Handle the case when path has spaces
233    # i.e. /Applications/Couchbase Server.app/Contents/...
234    if os.name == 'nt':
235        path = re.sub(r' ', '^ ', path)
236    else:
237        path = re.sub(r' ', '\ ', path)
238
239    # If a bucket was specfified then set-up the string to pass to cbbackup.
240    specific_bucket = ''
241    if len(bucketList) == 1:
242        specific_bucket = ' -b ' + bucketList[0]
243
244    extra_options = ''
245    if args.extra:
246        extra_options = ' -x ' + args.extra
247
248    # Group the number of vbuckets per process
249    for i in range(0, len(vbucketList), int(args.number)):
250        chunk = vbucketList[i:i + int(args.number)]
251        vbucketsname = str(chunk[0]) + '-' + str(chunk[-1])
252        command_line = os.path.join(path, 'cbbackup') + ' -v -t 1 --vbucket-list=' + ''.join(str(chunk).split()) + ' http://' \
253            + node + ':' + rest + ' ' + os.path.join(backupDir, vbucketsname) + ' -u ' + args.username \
254            + ' -p ' + args.password + extra_options + specific_bucket + ' 2>' + \
255            os.path.join(backupDir, 'logs', vbucketsname) + '.err'
256        if args.verbose:
257            print command_line
258        p = subprocess.Popen(command_line, shell=True)
259        processes[p] = vbucketsname
260
261    # Did we backup anything?
262    if len(processes) == 0:
263        print 'Did not backup anything'
264        print 'Please check that you have the buckets on ' + args.node
265        exit(1)
266    else:
267        print 'Waiting for the backup to complete...'
268        successCount = 0
269        for p in processes:
270            p.wait()
271            if p.returncode == 1:
272                print 'Error with backup - look in ' + os.path.join(backupDir, 'logs', processes[p]) + '.err for details'
273            else:
274                successCount += 1
275
276        if successCount == len(processes):
277            print 'SUCCESSFULLY COMPLETED!'
278        else:
279            print 'ERROR!'
280            exit(1)
281