1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  *     Copyright 2015 Couchbase, Inc
4  *
5  *   Licensed under the Apache License, Version 2.0 (the "License");
6  *   you may not use this file except in compliance with the License.
7  *   You may obtain a copy of the License at
8  *
9  *       http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *   Unless required by applicable law or agreed to in writing, software
12  *   distributed under the License is distributed on an "AS IS" BASIS,
13  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *   See the License for the specific language governing permissions and
15  *   limitations under the License.
16  */
17 
18 #pragma once
19 
20 #include "memcached.h"
21 
22 #include "connection.h"
23 #include "cookie.h"
24 #include "subdocument_traits.h"
25 #include "xattr/utils.h"
26 
27 #include <cstddef>
28 #include <iomanip>
29 #include <memory>
30 #include <platform/compress.h>
31 #include <platform/sized_buffer.h>
32 
33 #include <unordered_map>
34 
35 enum class MutationSemantics : uint8_t { Add, Replace, Set };
36 
37 // Used to describe which xattr keys the xtoc vattr should return
38 enum class XtocSemantics : uint8_t { None, User, System, All };
39 
40 /** Subdoc command context. An instance of this exists for the lifetime of
41  *  each subdocument command, and it used to hold information which needs to
42  *  persist across calls to subdoc_executor; for example when one or more
43  *  engine functions return EWOULDBLOCK and hence the executor needs to be
44  *  retried.
45  */
46 class SubdocCmdContext : public CommandContext {
47 public:
48     /**
49      * All subdoc access happens in two phases... First we'll run through
50      * all of the operations on the extended attributes, then we'll run
51      * over all of the ones in the body.
52      */
53     enum class Phase : uint8_t {
54         XATTR,
55         Body
56     };
57 
58     class OperationSpec;
59     typedef std::vector<OperationSpec> Operations;
60 
SubdocCmdContext(Cookie & cookie_,const SubdocCmdTraits traits_)61     SubdocCmdContext(Cookie& cookie_, const SubdocCmdTraits traits_)
62         : cookie(cookie_),
63           connection(cookie_.getConnection()),
64           traits(traits_) {
65     }
66 
67     ENGINE_ERROR_CODE pre_link_document(item_info& info) override;
68 
69     /**
70      * Get the padded value we want to use for values with macro expansion.
71      * Note that the macro name must be evaluated elsewhere as this method
72      * expect the input value to be one of the legal macros.
73      *
74      * @param macro the name of the macro to return the padded value for
75      * @return the buffer we want to pass on to subdoc instead of the macro
76      *         name
77      */
78     cb::const_char_buffer get_padded_macro(cb::const_char_buffer macro);
79 
80     /**
81      * Generate macro padding we may use to substitute a macro with. E.g. We
82      * replace "${Mutation.CAS}" or "${Mutation.seqno}" with the generated
83      * padding. It needs to be wide enough so that we can do an in-place
84      * replacement with the actual CAS or seqno in the pre_link_document
85      * callback.
86      *
87      * We can't really use a hardcoded value (as we would limit the user
88      * for what they could inject in their documents, and we don't want to
89      * suddenly replace user data with a cas value ;).
90      *
91      * This method tries to generate a string, and then scans through the
92      * supplied payload to ensure that it isn't present there before
93      * scanning through all of the values in the xattr modidications to
94      * ensure that it isn't part of any of them either.
95      *
96      * You might think: oh, why don't you just store the pointers to where
97      * in the blob we injected the macro? The problem with that is that
98      * there isn't any restrictions on the order you may specify the
99      * mutations in a multiop, so that you could move the stuff around;
100      * replace it; delete it. That means that you would have to go
101      * through and relocate all of these offsets after each mutation.
102      * Not impossible, but I don't think it would simplify the logic
103      * that much ;-)
104      *
105      * @param payload the JSON value for the xattr to perform macro
106      *                substitution in
107      * @param macro the macro for which we want to generate the padding
108      *
109      * @throws std::logic_error if the macro expansion size is invalid
110      */
111     void generate_macro_padding(cb::const_char_buffer payload,
112                                 cb::xattr::macros::macro macro);
113 
getOperations(const Phase phase)114     Operations& getOperations(const Phase phase) {
115         switch (phase) {
116         case Phase::Body:
117             return operations[0];
118         case Phase::XATTR:
119             return operations[1];
120         }
121         throw std::invalid_argument("SubdocCmdContext::getOperations() invalid phase");
122     }
123 
getOperations()124     Operations& getOperations() {
125         return getOperations(currentPhase);
126     }
127 
getCurrentPhase()128     Phase getCurrentPhase() {
129         return currentPhase;
130     }
131 
setCurrentPhase(Phase phase)132     void setCurrentPhase(Phase phase) {
133         currentPhase = phase;
134     }
135 
136     // Returns the total size of all Operation values (bytes).
137     uint64_t getOperationValueBytesTotal() const;
138 
139     // Cookie this command is associated with.
140     Cookie& cookie;
141 
142     Connection& connection;
143 
144     // The traits for this command.
145     SubdocCmdTraits traits;
146 
147     // The expanded input JSON document. This may either refer to:
148     // a). The raw engine item iovec
149     // b). The 'inflated_doc_buffer' if the input document had to be
150     //     inflated.
151     // c). {intermediate_result} member of this object.
152     // Either way, it should /not/ be cb_free()d.
153     // Note this is *always* in a decompressed form (and hence can safely be
154     // read / manipulated directly) - see get_document_for_searching().
155     // TODO: Remove (b), and just use intermediate result.
156     cb::const_char_buffer in_doc{};
157 
158     // Temporary buffer to hold the inflated content in case of the
159     // document in the engine being compressed
160     cb::compression::Buffer inflated_doc_buffer;
161 
162     // Temporary buffer used to hold the intermediate result document for
163     // multi-path mutations. {in_doc} is then updated to point to this to use
164     // as input for the next multi-path mutation.
165     std::unique_ptr<char[]> temp_doc;
166 
167     // Temporary buffer used to hold the xattrs in use, as a get request
168     // may hold pointers into the repacked xattr buckets
169     std::unique_ptr<char[]> xattr_buffer;
170 
171     // CAS value of the input document. Required to ensure we only store a
172     // new document which was derived from the same original input document.
173     uint64_t in_cas = 0;
174 
175     // Flags of the input document. Required so we can set the same flags to
176     // to the new document, so flags are unchanged by subdoc.
177     uint32_t in_flags = 0;
178 
179     // The datatype for the document currently held in `in_doc`. This
180     // is used to set the new documents datatype.
181     // Note: If the original input was Snappy compressed; it will be
182     // decompressed during fetch (by get_document_for_searching()) - as such
183     // this field will never have the Snappy bit set.
184     protocol_binary_datatype_t in_datatype = PROTOCOL_BINARY_RAW_BYTES;
185 
186     // The state of the document currently held in `in_doc`. This is used
187     // to to set the new documents state.
188     DocumentState in_document_state = DocumentState::Alive;
189 
190     // True if this operation has been successfully executed (via subjson)
191     // and we have valid result.
192     bool executed = false;
193 
194     // [Mutations only] The type of the root element, if flags & FLAG_MKDOC
195     jsonsl_type_t jroot_type = JSONSL_T_ROOT;
196 
197     // [Mutations only] True if the doc does not exist and an insert (rather
198     // than replace) is required.
199     bool needs_new_doc = false;
200 
201     // Overall status of the entire command.
202     // For single-path commands this is simply the same as the first (and only)
203     // opetation, for multi-path it's an aggregate status.
204     protocol_binary_response_status overall_status =
205             PROTOCOL_BINARY_RESPONSE_SUCCESS;
206 
207     // [Mutations only] Mutation sequence number and vBucket UUID. Only set
208     // if the calling connection has the MUTATION_SEQNO feature enabled; to be
209     // included in the response back to the client.
210     uint64_t vbucket_uuid = 0;
211     uint64_t sequence_no = 0;
212 
213     // [Mutations only] Size in bytes of the new item to store into engine.
214     // Held in the context so upon success we can update statistics.
215     size_t out_doc_len = 0;
216 
217     // [Mutations only] New item to store into engine.
218     cb::unique_item_ptr out_doc;
219 
220     // Size in bytes of the response value to send back to the client.
221     size_t response_val_len = 0;
222 
223     // Set to true if one (or more) of the xattr operation wants to do
224     // macro expansion.
225     bool do_macro_expansion = false;
226 
227     // Set to true if we want to operate on deleted documents
228     bool do_allow_deleted_docs = false;
229 
230     // Set to true if we want to delete the document after modifying it
231     bool do_delete_doc = false;
232 
233     // true if there are no system xattrs after the operation. In
234     // reality this means we do a bucket_remove rather than a bucket_update
235     bool no_sys_xattrs = false;
236 
237     /* Specification of a single path operation. Encapsulates both the request
238      * parameters, and (later) the result of the operation.
239      */
240     class OperationSpec {
241     public:
242         // Constructor for lookup operations (no value).
243         OperationSpec(SubdocCmdTraits traits_,
244                       protocol_binary_subdoc_flag flags_,
245                       cb::const_char_buffer path_);
246 
247         // Constructor for operations requiring a value.
248         OperationSpec(SubdocCmdTraits traits_,
249                       protocol_binary_subdoc_flag flags_,
250                       cb::const_char_buffer path_,
251                       cb::const_char_buffer value_);
252 
253         // Move constructor.
254         OperationSpec(OperationSpec&& other);
255 
256         // The traits of this individual Operation.
257         SubdocCmdTraits traits;
258 
259         // The flags set for this individual Operation
260         protocol_binary_subdoc_flag flags;
261 
262         // Path to operate on. Owned by the original request packet.
263         cb::const_char_buffer path;
264 
265         // [For mutations only] Value to apply to document. Owned by the
266         // original request packet.
267         cb::const_char_buffer value;
268 
269         // Status code of the operation.
270         protocol_binary_response_status status;
271 
272         // Result of this operation, to be returned back to the client (for
273         // operations which return a result).
274         Subdoc::Result result;
275     };
276 
277     /**
278      * Get the xattr key being accessed in this context. Only one
279      * xattr key is allowed in each multi op
280      *
281      * @return the key
282      */
get_xattr_key()283     cb::const_char_buffer get_xattr_key() {
284         return xattr_key;
285     }
286 
287     /**
288      * Set the xattr key being accessed in this context. Only one
289      * xattr key is allowed in each multi op
290      *
291      * @param key the key to be accessed
292      */
set_xattr_key(const cb::const_char_buffer & key)293     void set_xattr_key(const cb::const_char_buffer& key) {
294         xattr_key = key;
295     }
296 
297     MutationSemantics mutationSemantics = MutationSemantics::Replace;
298 
299     void setMutationSemantics(mcbp::subdoc::doc_flag docFlags);
300 
301     /**
302      * Get the document containing all of the virtual attributes for
303      * the document. The storage is created the first time the method is called,
304      * and reused for the rest of the lifetime of the context.
305      */
306     cb::const_char_buffer get_document_vattr();
307 
308     /*
309      * Get the xtoc document which contains a list of xattr keys that exist for
310      * the document.
311      */
312     cb::const_char_buffer get_xtoc_vattr();
313 
314     // This is the item info for the item we've fetched from the
315     // database
getInputItemInfo()316     item_info& getInputItemInfo() {
317         return input_item_info;
318     }
319 
320     /**
321      * Initialize all of the internal input variables with a
322      * flat, uncompressed JSON document ready for performing a subjson
323      * operation on it.
324      *
325      * @param client_cas The CAS provided by the client (which should be
326      *                   used for updates to the document
327      *
328      * @return PROTOCOL_BINARY_RESPONSE_SUCCESS for success, otherwise an
329      *         error code which should be returned to the client immediately
330      *         (and stop executing of the command)
331      */
332     protocol_binary_response_status get_document_for_searching(
333             uint64_t client_cas);
334 
335     /**
336      * The result of subdoc_fetch.
337      */
338     cb::unique_item_ptr fetchedItem;
339 
340     XtocSemantics xtocSemantics = XtocSemantics::None;
341 
342 private:
343     // The item info representing the input document
344     item_info input_item_info = {};
345 
346     // The array containing all of the operations requested by the user.
347     // Each element in the array contains the operations which should be
348     // run in each phase. Use `getOperations()` to get the correct entry
349     // in this array as that method contains the logic of where each
350     // phase lives.
351     std::array<Operations, 2> operations;
352 
353     // The phase we're currently operating in
354     Phase currentPhase = Phase::XATTR;
355 
356     template <typename T>
357     std::string macroToString(T macroValue);
358 
359     /**
360      * Check whether or not the SubdocCmdContext contains a given macro
361      * @param macro The macro we are checking for
362      * @return True if the macro exists, False otherwise
363      */
364     bool containsMacro(const cb::const_char_buffer& macro);
365 
366     void substituteMacro(cb::const_char_buffer macroName,
367                          const std::string& macroValue,
368                          cb::char_buffer& value);
369 
370     /**
371      * Returns the value CRC32C of the document processed by the current
372      * subdoc context
373      */
374     uint32_t computeValueCRC32C();
375 
376     // The xattr key being accessed in this command
377     cb::const_char_buffer xattr_key;
378 
379     using MacroPair = std::pair<cb::const_char_buffer, std::string>;
380     std::vector<MacroPair> paddedMacros;
381 
382     std::string document_vattr;
383     std::string xtoc_vattr;
384 }; // class SubdocCmdContext
385