1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  *     Copyright 2016 Couchbase, Inc.
4  *
5  *   Licensed under the Apache License, Version 2.0 (the "License");
6  *   you may not use this file except in compliance with the License.
7  *   You may obtain a copy of the License at
8  *
9  *       http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *   Unless required by applicable law or agreed to in writing, software
12  *   distributed under the License is distributed on an "AS IS" BASIS,
13  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *   See the License for the specific language governing permissions and
15  *   limitations under the License.
16  */
17 
18 /*
19  * Function to base64 encode and decode text as described in RFC 4648
20  *
21  * @author Trond Norbye
22  */
23 
24 #include <algorithm>
25 #include <cctype>
26 #include <cstdint>
27 #include <cstring>
28 #include <iostream>
29 #include <platform/base64.h>
30 #include <stdexcept>
31 #include <string>
32 #include <vector>
33 
34 /**
35  * An array of the legal characters used for direct lookup
36  */
37 static const uint8_t code[] =
38     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
39 
40 /**
41  * A method to map the code back to the value
42  *
43  * @param code the code to map
44  * @return the byte value for the code character
45  */
code2val(const uint8_t code)46 static const uint32_t code2val(const uint8_t code) {
47     if (code >= 'A' && code <= 'Z') {
48         return code - 'A';
49     }
50     if (code >= 'a' && code <= 'z') {
51         return code - 'a' + uint8_t(26);
52     }
53     if (code >= '0' && code <= '9') {
54         return code - '0' + uint8_t(52);
55     }
56     if (code == '+') {
57         return uint8_t(62);
58     }
59     if (code == '/') {
60         return uint8_t(63);
61     }
62     throw std::invalid_argument("Couchbase::base64::code2val Invalid "
63                                     "input character");
64 }
65 
66 /**
67  * Encode up to 3 characters to 4 output character.
68  *
69  * @param s pointer to the input stream
70  * @param d pointer to the output stream
71  * @param num the number of characters from s to encode
72  */
encode_rest(const uint8_t* s, std::string& result, size_t num)73 static void encode_rest(const uint8_t* s, std::string& result, size_t num) {
74     uint32_t val = 0;
75 
76     switch (num) {
77     case 2:
78         val = (uint32_t)((*s << 16) | (*(s + 1) << 8));
79         break;
80     case 1:
81         val = (uint32_t)((*s << 16));
82         break;
83     default:
84         throw std::invalid_argument("base64::encode_rest num may be 1 or 2");
85     }
86 
87     result.push_back((char)code[(val >> 18) & 63]);
88     result.push_back((char)code[(val >> 12) & 63]);
89     if (num == 2) {
90         result.push_back((char)code[(val >> 6) & 63]);
91     } else {
92         result.push_back('=');
93     }
94     result.push_back('=');
95 }
96 
97 /**
98  * Encode 3 bytes to 4 output character.
99  *
100  * @param s pointer to the input stream
101  * @param d pointer to the output stream
102  */
encode_triplet(const uint8_t* s, std::string& str)103 static void encode_triplet(const uint8_t* s, std::string& str) {
104     uint32_t val = (uint32_t)((*s << 16) | (*(s + 1) << 8) | (*(s + 2)));
105     str.push_back((char)code[(val >> 18) & 63]);
106     str.push_back((char)code[(val >> 12) & 63]);
107     str.push_back((char)code[(val >> 6) & 63]);
108     str.push_back((char)code[val & 63]);
109 }
110 
111 /**
112  * decode 4 input characters to up to two output bytes
113  *
114  * @param s source string
115  * @param d destination
116  * @return the number of characters inserted
117  */
decode_quad(const uint8_t* s, std::vector<uint8_t>& d)118 static int decode_quad(const uint8_t* s, std::vector<uint8_t>& d) {
119     uint32_t value = code2val(s[0]) << 18;
120     value |= code2val(s[1]) << 12;
121 
122     int ret = 3;
123 
124     if (s[2] == '=') {
125         ret = 1;
126     } else {
127         value |= code2val(s[2]) << 6;
128         if (s[3] == '=') {
129             ret = 2;
130         } else {
131             value |= code2val(s[3]);
132         }
133     }
134 
135     d.push_back(uint8_t(value >> 16));
136     if (ret > 1) {
137         d.push_back(uint8_t(value >> 8));
138         if (ret > 2) {
139             d.push_back(uint8_t(value));
140         }
141     }
142 
143     return ret;
144 }
145 
146 namespace cb {
147 namespace base64 {
148 PLATFORM_PUBLIC_API
encode(const cb::const_byte_buffer blob, bool prettyprint)149 std::string encode(const cb::const_byte_buffer blob, bool prettyprint) {
150     // base64 encoding encodes up to 3 input characters to 4 output
151     // characters in the alphabet above.
152     auto triplets = blob.size() / 3;
153     auto rest = blob.size() % 3;
154     auto chunks = triplets;
155     if (rest != 0) {
156         ++chunks;
157     }
158 
159     std::string result;
160     if (prettyprint) {
161         // In pretty-print mode we insert a newline after adding
162         // 16 chunks (four characters).
163         result.reserve(chunks * 4 + chunks / 16);
164     } else {
165         result.reserve(chunks * 4);
166     }
167 
168     const uint8_t* in = blob.data();
169 
170     chunks = 0;
171     for (size_t ii = 0; ii < triplets; ++ii) {
172         encode_triplet(in, result);
173         in += 3;
174 
175         if (prettyprint && (++chunks % 16) == 0) {
176             result.push_back('\n');
177         }
178     }
179 
180     if (rest > 0) {
181         encode_rest(in, result, rest);
182     }
183 
184     if (prettyprint && result.back() != '\n') {
185         result.push_back('\n');
186     }
187 
188     return result;
189 }
190 
191 PLATFORM_PUBLIC_API
decode(const cb::const_char_buffer blob)192 std::vector<uint8_t> decode(const cb::const_char_buffer blob) {
193     std::vector<uint8_t> destination;
194 
195     if (blob.empty()) {
196         return destination;
197     }
198 
199     // To reduce the number of reallocations, start by reserving an
200     // output buffer of 75% of the input size (and add 3 to avoid dealing
201     // with zero)
202     size_t estimate = blob.size() * 0.75;
203     destination.reserve(estimate + 3);
204 
205     const uint8_t* in = reinterpret_cast<const uint8_t*>(blob.data());
206     size_t offset = 0;
207     while (offset < blob.size()) {
208         if (std::isspace((int)*in)) {
209             ++offset;
210             ++in;
211             continue;
212         }
213 
214         // We need at least 4 bytes
215         if ((offset + 4) > blob.size()) {
216             throw std::invalid_argument("cb::base64::decode invalid input");
217         }
218 
219         decode_quad(in, destination);
220         in += 4;
221         offset += 4;
222     }
223 
224     return destination;
225 }
226 
227 }
228 }
229