1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3 * Copyright 2016 Couchbase, Inc.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * Function to base64 encode and decode text as described in RFC 4648
20 *
21 * @author Trond Norbye
22 */
23
24 #include <algorithm>
25 #include <cctype>
26 #include <cstdint>
27 #include <cstring>
28 #include <iostream>
29 #include <platform/base64.h>
30 #include <stdexcept>
31 #include <string>
32 #include <vector>
33
34 /**
35 * An array of the legal characters used for direct lookup
36 */
37 static const uint8_t code[] =
38 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
39
40 /**
41 * A method to map the code back to the value
42 *
43 * @param code the code to map
44 * @return the byte value for the code character
45 */
code2val(const uint8_t code)46 static const uint32_t code2val(const uint8_t code) {
47 if (code >= 'A' && code <= 'Z') {
48 return code - 'A';
49 }
50 if (code >= 'a' && code <= 'z') {
51 return code - 'a' + uint8_t(26);
52 }
53 if (code >= '0' && code <= '9') {
54 return code - '0' + uint8_t(52);
55 }
56 if (code == '+') {
57 return uint8_t(62);
58 }
59 if (code == '/') {
60 return uint8_t(63);
61 }
62 throw std::invalid_argument("Couchbase::base64::code2val Invalid "
63 "input character");
64 }
65
66 /**
67 * Encode up to 3 characters to 4 output character.
68 *
69 * @param s pointer to the input stream
70 * @param d pointer to the output stream
71 * @param num the number of characters from s to encode
72 */
encode_rest(const uint8_t* s, std::string& result, size_t num)73 static void encode_rest(const uint8_t* s, std::string& result, size_t num) {
74 uint32_t val = 0;
75
76 switch (num) {
77 case 2:
78 val = (uint32_t)((*s << 16) | (*(s + 1) << 8));
79 break;
80 case 1:
81 val = (uint32_t)((*s << 16));
82 break;
83 default:
84 throw std::invalid_argument("base64::encode_rest num may be 1 or 2");
85 }
86
87 result.push_back((char)code[(val >> 18) & 63]);
88 result.push_back((char)code[(val >> 12) & 63]);
89 if (num == 2) {
90 result.push_back((char)code[(val >> 6) & 63]);
91 } else {
92 result.push_back('=');
93 }
94 result.push_back('=');
95 }
96
97 /**
98 * Encode 3 bytes to 4 output character.
99 *
100 * @param s pointer to the input stream
101 * @param d pointer to the output stream
102 */
encode_triplet(const uint8_t* s, std::string& str)103 static void encode_triplet(const uint8_t* s, std::string& str) {
104 uint32_t val = (uint32_t)((*s << 16) | (*(s + 1) << 8) | (*(s + 2)));
105 str.push_back((char)code[(val >> 18) & 63]);
106 str.push_back((char)code[(val >> 12) & 63]);
107 str.push_back((char)code[(val >> 6) & 63]);
108 str.push_back((char)code[val & 63]);
109 }
110
111 /**
112 * decode 4 input characters to up to two output bytes
113 *
114 * @param s source string
115 * @param d destination
116 * @return the number of characters inserted
117 */
decode_quad(const uint8_t* s, std::vector<uint8_t>& d)118 static int decode_quad(const uint8_t* s, std::vector<uint8_t>& d) {
119 uint32_t value = code2val(s[0]) << 18;
120 value |= code2val(s[1]) << 12;
121
122 int ret = 3;
123
124 if (s[2] == '=') {
125 ret = 1;
126 } else {
127 value |= code2val(s[2]) << 6;
128 if (s[3] == '=') {
129 ret = 2;
130 } else {
131 value |= code2val(s[3]);
132 }
133 }
134
135 d.push_back(uint8_t(value >> 16));
136 if (ret > 1) {
137 d.push_back(uint8_t(value >> 8));
138 if (ret > 2) {
139 d.push_back(uint8_t(value));
140 }
141 }
142
143 return ret;
144 }
145
146 namespace cb {
147 namespace base64 {
148 PLATFORM_PUBLIC_API
encode(const cb::const_byte_buffer blob, bool prettyprint)149 std::string encode(const cb::const_byte_buffer blob, bool prettyprint) {
150 // base64 encoding encodes up to 3 input characters to 4 output
151 // characters in the alphabet above.
152 auto triplets = blob.size() / 3;
153 auto rest = blob.size() % 3;
154 auto chunks = triplets;
155 if (rest != 0) {
156 ++chunks;
157 }
158
159 std::string result;
160 if (prettyprint) {
161 // In pretty-print mode we insert a newline after adding
162 // 16 chunks (four characters).
163 result.reserve(chunks * 4 + chunks / 16);
164 } else {
165 result.reserve(chunks * 4);
166 }
167
168 const uint8_t* in = blob.data();
169
170 chunks = 0;
171 for (size_t ii = 0; ii < triplets; ++ii) {
172 encode_triplet(in, result);
173 in += 3;
174
175 if (prettyprint && (++chunks % 16) == 0) {
176 result.push_back('\n');
177 }
178 }
179
180 if (rest > 0) {
181 encode_rest(in, result, rest);
182 }
183
184 if (prettyprint && result.back() != '\n') {
185 result.push_back('\n');
186 }
187
188 return result;
189 }
190
191 PLATFORM_PUBLIC_API
decode(const cb::const_char_buffer blob)192 std::vector<uint8_t> decode(const cb::const_char_buffer blob) {
193 std::vector<uint8_t> destination;
194
195 if (blob.empty()) {
196 return destination;
197 }
198
199 // To reduce the number of reallocations, start by reserving an
200 // output buffer of 75% of the input size (and add 3 to avoid dealing
201 // with zero)
202 size_t estimate = blob.size() * 0.75;
203 destination.reserve(estimate + 3);
204
205 const uint8_t* in = reinterpret_cast<const uint8_t*>(blob.data());
206 size_t offset = 0;
207 while (offset < blob.size()) {
208 if (std::isspace((int)*in)) {
209 ++offset;
210 ++in;
211 continue;
212 }
213
214 // We need at least 4 bytes
215 if ((offset + 4) > blob.size()) {
216 throw std::invalid_argument("cb::base64::decode invalid input");
217 }
218
219 decode_quad(in, destination);
220 in += 4;
221 offset += 4;
222 }
223
224 return destination;
225 }
226
227 }
228 }
229