xref: /4.0.0/couchstore/src/views/spatial_modify.c (revision d826ffba)
1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2#include "config.h"
3#include <assert.h>
4#include <stdlib.h>
5#include <string.h>
6#include <signal.h>
7
8#include "couch_btree.h"
9#include "util.h"
10#include "arena.h"
11#include "node_types.h"
12#include "../util.h"
13#include "spatial.h"
14
15/* NOTE vmx 2014-07-21: spatial_modify.c uses a lot of code from
16 * btree_modify.cc. The difference is the different handling of the reduce.
17 * In spatial views, we use a reduce function to calculate the enclosing
18 * MBB of the parent node, which is then used as key.
19 * In the future there should also be a different flushing/leaf node
20 * partitioning strategy */
21
22static couchstore_error_t flush_spatial_partial(couchfile_modify_result *res,
23                                                size_t mr_quota);
24
25static couchstore_error_t flush_spatial(couchfile_modify_result *res);
26
27/* When flushing the nodes, only fill them up to 2/3 of the threshold.
28 * This way future inserts still have room within the nodes without
29 * forcing a split immediately */
30static couchstore_error_t maybe_flush_spatial(couchfile_modify_result *mr)
31{
32    if(mr->rq->compacting) {
33        /* The compactor can (and should), just write out nodes
34         * of size CHUNK_SIZE as soon as it can, so that it can
35         * free memory it no longer needs. */
36        if (mr->modified && (((mr->node_type == KV_NODE) &&
37            mr->node_len > (mr->rq->kv_chunk_threshold * 2 / 3)) ||
38            ((mr->node_type == KP_NODE) &&
39             mr->node_len > (mr->rq->kp_chunk_threshold * 2 / 3) &&
40             mr->count > 1))) {
41            return flush_spatial(mr);
42        }
43    } else if (mr->modified && mr->count > 3) {
44        /* Don't write out a partial node unless we've collected
45         * at least three items */
46        if ((mr->node_type == KV_NODE) &&
47             mr->node_len > mr->rq->kv_chunk_threshold) {
48            return flush_spatial_partial(
49                mr, (mr->rq->kv_chunk_threshold * 2 / 3));
50        }
51        if ((mr->node_type == KP_NODE) &&
52             mr->node_len > mr->rq->kp_chunk_threshold) {
53            return flush_spatial_partial(
54                mr, (mr->rq->kp_chunk_threshold * 2 / 3));
55        }
56    }
57
58    return COUCHSTORE_SUCCESS;
59}
60
61static nodelist *make_nodelist(arena* a, size_t bufsize)
62{
63    nodelist *r = (nodelist *) arena_alloc(a, sizeof(nodelist) + bufsize);
64    if (r == NULL) {
65        return NULL;
66    }
67    memset(r, 0, sizeof(nodelist));
68    r->data.size = bufsize;
69    r->data.buf = ((char *) r) + (sizeof(nodelist));
70    return r;
71}
72
73static couchfile_modify_result *spatial_make_modres(arena* a,
74                                            couchfile_modify_request *rq)
75{
76    couchfile_modify_result *res = (couchfile_modify_result *) arena_alloc(
77        a, sizeof(couchfile_modify_result));
78    if (res == NULL) {
79        return NULL;
80    }
81    res->rq = rq;
82    res->arena = a;
83    res->arena_transient = NULL;
84    res->values = make_nodelist(a, 0);
85    if (res->values == NULL) {
86        return NULL;
87    }
88    res->values_end = res->values;
89    res->node_len = 0;
90    res->count = 0;
91    res->pointers = make_nodelist(a, 0);
92    if (res->pointers == NULL) {
93        return NULL;
94    }
95    res->pointers_end = res->pointers;
96    res->modified = 0;
97    res->node_type = 0;
98    res->error_state = 0;
99    return res;
100}
101
102couchstore_error_t spatial_push_item(sized_buf *k,
103                                     sized_buf *v,
104                                     couchfile_modify_result *dst)
105{
106    nodelist *itm = NULL;
107    if(dst->arena_transient != NULL)
108    {
109        itm = make_nodelist(dst->arena_transient, 0);
110    } else {
111        assert(dst->arena != NULL);
112        itm = make_nodelist(dst->arena, 0);
113    }
114    if (itm == NULL) {
115        return COUCHSTORE_ERROR_ALLOC_FAIL;
116    }
117    itm->data = *v;
118    itm->key = *k;
119    itm->pointer = NULL;
120    dst->values_end->next = itm;
121    dst->values_end = itm;
122    /* Encoded size (see flush_spatial) */
123    dst->node_len += k->size + v->size + sizeof(raw_kv_length);
124    dst->count++;
125    return maybe_flush_spatial(dst);
126}
127
128static nodelist *encode_pointer(arena* a, node_pointer *ptr)
129{
130    raw_node_pointer *raw;
131    nodelist *pel = make_nodelist(
132        a, sizeof(raw_node_pointer) + ptr->reduce_value.size);
133    if (pel == NULL) {
134        return NULL;
135    }
136    raw = (raw_node_pointer*)pel->data.buf;
137    encode_raw48(ptr->pointer, &raw->pointer);
138    encode_raw48(ptr->subtreesize, &raw->subtreesize);
139    raw->reduce_value_size = encode_raw16((uint16_t)ptr->reduce_value.size);
140    memcpy(raw + 1, ptr->reduce_value.buf, ptr->reduce_value.size);
141    pel->pointer = ptr;
142    pel->key = ptr->key;
143    return pel;
144}
145
146/* Write the current contents of the values list to disk as a node
147 * and add the resulting pointer to the pointers list. */
148static couchstore_error_t flush_spatial(couchfile_modify_result *res)
149{
150    return flush_spatial_partial(res, res->node_len);
151}
152
153/* Write a node using enough items from the values list to create a node
154 * with uncompressed size of at least mr_quota */
155static couchstore_error_t flush_spatial_partial(couchfile_modify_result *res,
156                                                size_t mr_quota)
157{
158    char *dst;
159    couchstore_error_t errcode = COUCHSTORE_SUCCESS;
160    int itmcount = 0;
161    char *nodebuf = NULL;
162    sized_buf writebuf;
163    char reducebuf[MAX_REDUCTION_SIZE];
164    size_t reducesize = 0;
165    uint64_t subtreesize = 0;
166    cs_off_t diskpos;
167    size_t disk_size;
168    nodelist *i, *pel;
169    node_pointer *ptr;
170
171    if (res->values_end == res->values || ! res->modified) {
172        /* Empty */
173        return COUCHSTORE_SUCCESS;
174    }
175
176    /* nodebuf/writebuf is very short-lived and can be large, so use regular
177     * malloc heap for it: */
178    nodebuf = (char *) malloc(res->node_len + 1);
179    if (nodebuf == NULL) {
180        return COUCHSTORE_ERROR_ALLOC_FAIL;
181    }
182
183    writebuf.buf = nodebuf;
184
185    dst = nodebuf;
186    *(dst++) = (char) res->node_type;
187
188    i = res->values->next;
189    /* We don't care that we've reached mr_quota if we haven't written out
190     * at least two items and we're not writing a leaf node. */
191    while (i != NULL &&
192           (mr_quota > 0 || (itmcount < 2 && res->node_type == KP_NODE))) {
193        dst = (char *) write_kv(dst, i->key, i->data);
194        if (i->pointer) {
195            subtreesize += i->pointer->subtreesize;
196        }
197        mr_quota -= i->key.size + i->data.size + sizeof(raw_kv_length);
198        i = i->next;
199        res->count--;
200        itmcount++;
201    }
202
203    writebuf.size = dst - nodebuf;
204
205    errcode = (couchstore_error_t) db_write_buf_compressed(
206        res->rq->file, &writebuf, &diskpos, &disk_size);
207    free(nodebuf);  /* here endeth the nodebuf. */
208    if (errcode != COUCHSTORE_SUCCESS) {
209        return errcode;
210    }
211
212    /* Store the enclosing MBB in the reducebuf */
213    if (res->node_type == KV_NODE && res->rq->reduce) {
214        errcode = res->rq->reduce(
215            reducebuf, &reducesize, res->values->next, itmcount,
216            res->rq->user_reduce_ctx);
217        if (errcode != COUCHSTORE_SUCCESS) {
218            return errcode;
219        }
220        assert(reducesize <= sizeof(reducebuf));
221    }
222
223    if (res->node_type == KP_NODE && res->rq->rereduce) {
224        errcode = res->rq->rereduce(
225            reducebuf, &reducesize, res->values->next, itmcount,
226            res->rq->user_reduce_ctx);
227        if (errcode != COUCHSTORE_SUCCESS) {
228            return errcode;
229        }
230        assert(reducesize <= sizeof(reducebuf));
231    }
232
233    /* `reducesize` one time for the key, one time for the actual reduce */
234    ptr = (node_pointer *) arena_alloc(
235        res->arena, sizeof(node_pointer) + 2 * reducesize);
236    if (ptr == NULL) {
237        return COUCHSTORE_ERROR_ALLOC_FAIL;
238    }
239
240    ptr->key.buf = ((char *)ptr) + sizeof(node_pointer);
241    ptr->reduce_value.buf = ((char *)ptr) + sizeof(node_pointer) + reducesize;
242
243    ptr->key.size = reducesize;
244    ptr->reduce_value.size = reducesize;
245
246    /* Store the enclosing MBB that was calculate in the reduce function
247     * as the key. The reduce also stores it as it is the "Original MBB"
248     * used in the RR*-tree algorithm */
249    memcpy(ptr->key.buf, reducebuf, reducesize);
250    memcpy(ptr->reduce_value.buf, reducebuf, reducesize);
251
252    ptr->subtreesize = subtreesize + disk_size;
253    ptr->pointer = diskpos;
254
255    pel = encode_pointer(res->arena, ptr);
256    if (pel == NULL) {
257        return COUCHSTORE_ERROR_ALLOC_FAIL;
258    }
259
260    res->pointers_end->next = pel;
261    res->pointers_end = pel;
262
263    res->node_len -= (writebuf.size - 1);
264
265    res->values->next = i;
266    if(i == NULL) {
267        res->values_end = res->values;
268    }
269
270    return COUCHSTORE_SUCCESS;
271}
272
273/* Move src pointers list to dst node's values list. */
274static couchstore_error_t spatial_move_pointers(couchfile_modify_result *src,
275                                                couchfile_modify_result *dst)
276{
277    nodelist *ptr, *next;
278
279    couchstore_error_t errcode = COUCHSTORE_SUCCESS;
280    if (src->pointers_end == src->pointers) {
281        return COUCHSTORE_SUCCESS;
282    }
283
284    ptr = src->pointers->next;
285    next = ptr;
286    while (ptr != NULL && errcode == 0) {
287        dst->node_len += ptr->data.size + ptr->key.size +
288                sizeof(raw_kv_length);
289        dst->count++;
290
291        next = ptr->next;
292        ptr->next = NULL;
293
294        dst->values_end->next = ptr;
295        dst->values_end = ptr;
296        ptr = next;
297        error_pass(maybe_flush_spatial(dst));
298    }
299
300cleanup:
301    src->pointers->next = next;
302    src->pointers_end = src->pointers;
303    return errcode;
304}
305
306
307static node_pointer *spatial_finish_root(couchfile_modify_request *rq,
308                                 couchfile_modify_result *root_result,
309                                 couchstore_error_t *errcode)
310{
311    node_pointer *ret_ptr = NULL;
312    couchfile_modify_result *collector = spatial_make_modres(root_result->arena, rq);
313    if (collector == NULL) {
314        *errcode = COUCHSTORE_ERROR_ALLOC_FAIL;
315        return NULL;
316    }
317    collector->modified = 1;
318    collector->node_type = KP_NODE;
319    flush_spatial(root_result);
320    while (1) {
321        if (root_result->pointers_end == root_result->pointers->next) {
322            /* The root result split into exactly one kp_node.
323             * Return the pointer to it. */
324            ret_ptr = root_result->pointers_end->pointer;
325            break;
326        } else {
327            couchfile_modify_result *tmp;
328            /* The root result split into more than one kp_node.
329             * Move the pointer list to the value list and write out the new
330             * node. */
331            *errcode = spatial_move_pointers(root_result, collector);
332
333            if (*errcode < 0) {
334                return NULL;
335            }
336
337            *errcode = flush_spatial(collector);
338            if (*errcode < 0) {
339                return NULL;
340            }
341            /* Swap root_result and collector mr's. */
342            tmp = root_result;
343            root_result = collector;
344            collector = tmp;
345        }
346    }
347
348    return ret_ptr;
349}
350
351
352/* Finished creating a new spatial tree, build pointers and get a root node. */
353node_pointer* complete_new_spatial(couchfile_modify_result* mr,
354                                   couchstore_error_t *errcode)
355{
356    couchfile_modify_result* targ_mr;
357    node_pointer* ret_ptr;
358
359    *errcode = flush_spatial(mr);
360    if(*errcode != COUCHSTORE_SUCCESS) {
361        return NULL;
362    }
363
364    targ_mr = spatial_make_modres(mr->arena, mr->rq);
365    if (targ_mr == NULL) {
366        *errcode = COUCHSTORE_ERROR_ALLOC_FAIL;
367        return NULL;
368    }
369    targ_mr->modified = 1;
370    targ_mr->node_type = KP_NODE;
371
372    *errcode = spatial_move_pointers(mr, targ_mr);
373    if(*errcode != COUCHSTORE_SUCCESS) {
374        return NULL;
375    }
376
377    if(targ_mr->count > 1 || targ_mr->pointers != targ_mr->pointers_end) {
378        ret_ptr = spatial_finish_root(mr->rq, targ_mr, errcode);
379    } else {
380        ret_ptr = targ_mr->values_end->pointer;
381    }
382
383    if (*errcode != COUCHSTORE_SUCCESS || ret_ptr == NULL) {
384        return NULL;
385    }
386
387    ret_ptr = copy_node_pointer(ret_ptr);
388    if (ret_ptr == NULL) {
389        *errcode = COUCHSTORE_ERROR_ALLOC_FAIL;
390    }
391    return ret_ptr;
392}
393