1% Licensed under the Apache License, Version 2.0 (the "License"); you may not
2% use this file except in compliance with the License. You may obtain a copy of
3% the License at
4%
5%   http://www.apache.org/licenses/LICENSE-2.0
6%
7% Unless required by applicable law or agreed to in writing, software
8% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10% License for the specific language governing permissions and limitations under
11% the License.
12
13-module(spatial_view).
14
15% Needed for `dict()` type on Erlang >= 17.0
16-compile(nowarn_deprecated_type).
17
18% For the updater
19-export([write_kvs/3, finish_build/3, get_state/1, view_name/2,
20         update_spatial/3, view_bitmap/1]).
21-export([encode_key_docid/2]).
22-export([convert_primary_index_kvs_to_binary/3]).
23-export([convert_back_index_kvs_to_binary/2]).
24-export([view_insert_doc_query_results/6]).
25% For the group
26-export([design_doc_to_set_view_group/2, view_group_data_size/2,
27         reset_view/1, setup_views/5, set_state/2]).
28% For the utils
29-export([clean_views/2, view_info/1]).
30% For the main module
31-export([get_row_count/1, make_wrapper_fun/2, fold/4, index_extension/0,
32         make_key_options/1, should_filter/1, query_args_view_name/1]).
33-export([stats_ets/1, server_name/1, sig_to_pid_ets/1, name_to_sig_ets/1,
34         pid_to_sig_ets/1]).
35% For the couch_set_view like calls
36-export([get_spatial_view/4]).
37% For couch_db
38-export([validate_ddoc/1]).
39
40
41-include_lib("couch_spatial.hrl").
42-include("couch_db.hrl").
43-include_lib("couch_set_view/include/couch_set_view.hrl").
44-include_lib("vtree/include/vtree.hrl").
45
46% Same as in couch_btree.erl
47-define(POINTER_BITS,   48).
48-define(TREE_SIZE_BITS, 48).
49-define(KEY_BITS,       12).
50-define(MAX_KEY_SIZE,   ((1 bsl ?KEY_BITS) - 1)).
51-define(VALUE_BITS,     28).
52
53% Same as the compactor uses for the ID-btree
54-define(SORTED_CHUNK_SIZE, 1024 * 1024).
55
56% View specific limits (same as in couch_set_view_updater).
57-define(VIEW_SINGLE_VALUE_BITS,     24).
58-define(VIEW_ALL_VALUES_BITS,       ?VALUE_BITS).
59-define(VIEW_GEOMETRY_BITS,         24).
60-define(MAX_VIEW_SINGLE_VALUE_SIZE, ((1 bsl ?VIEW_SINGLE_VALUE_BITS) - 1)).
61-define(MAX_VIEW_ALL_VALUES_SIZE,   ((1 bsl ?VIEW_ALL_VALUES_BITS) - 1)).
62-define(MAX_VIEW_GEOMETRY_SIZE,     ((1 bsl ?VIEW_GEOMETRY_BITS) - 1)).
63
64
65-define(SPATIAL_VIEW_SERVER_NAME_PROD, spatial_view_server_name_prod).
66-define(SPATIAL_VIEW_SERVER_NAME_DEV, spatial_view_server_name_dev).
67-define(SPATIAL_VIEW_STATS_ETS_PROD, spatial_view_stats_prod).
68-define(SPATIAL_VIEW_NAME_TO_SIG_ETS_PROD, spatial_view_name_to_sig_prod).
69-define(SPATIAL_VIEW_SIG_TO_PID_ETS_PROD, spatial_view_sig_to_pid_prod).
70-define(SPATIAL_VIEW_PID_TO_SIG_ETS_PROD, spatial_view_pid_to_sig_prod).
71-define(SPATIAL_VIEW_STATS_ETS_DEV, spatial_view_stats_dev).
72-define(SPATIAL_VIEW_NAME_TO_SIG_ETS_DEV, spatial_view_name_to_sig_dev).
73-define(SPATIAL_VIEW_SIG_TO_PID_ETS_DEV, spatial_view_sig_to_pid_dev).
74-define(SPATIAL_VIEW_PID_TO_SIG_ETS_DEV, spatial_view_pid_to_sig_dev).
75
76
77write_kvs(Group, TmpFiles0, ViewKVs) ->
78    lists:foldl(
79        fun({#set_view{id_num = Id}, KvList}, {AccCount, TmpFiles}) ->
80            TmpFileInfo0 = dict:fetch(Id, TmpFiles),
81            Mbb = calc_mbb(KvList),
82            Mbb2 = merge_mbbs(Mbb, TmpFileInfo0#set_view_tmp_file_info.extra),
83            TmpFileInfo = TmpFileInfo0#set_view_tmp_file_info{extra = Mbb2},
84
85            KvBins = convert_primary_index_kvs_to_binary(KvList, Group, []),
86            ViewRecords = lists:foldr(
87                fun({KeyBin, ValBin}, Acc) ->
88                    KvBin = [<<(byte_size(KeyBin)):16>>, KeyBin, ValBin],
89                    [[<<(iolist_size(KvBin)):32/native>>, KvBin] | Acc]
90                end,
91                [], KvBins),
92            ok = file:write(
93                TmpFileInfo#set_view_tmp_file_info.fd, ViewRecords),
94            TmpFiles2 = dict:store(Id, TmpFileInfo, TmpFiles),
95            {AccCount + length(KvBins), TmpFiles2}
96        end,
97        {0, TmpFiles0}, ViewKVs).
98
99
100% Calculates the enclosing multidimensional bounding box
101-spec calc_mbb([{{[[number()]], binary()}, binary()}]) -> [[number()]] | nil.
102calc_mbb(KvList) ->
103    Less = fun(A, B) -> A < B end,
104    lists:foldl(fun
105        ({{Key, _DocId}, _PartIdValue}, nil) ->
106            Key;
107        ({{Key, _DocId}, _PartIdValue}, Mbb) ->
108            lists:map(fun({[KeyMin, KeyMax], [MbbMin, MbbMax]}) ->
109                [vtree_util:min({KeyMin, MbbMin}, Less),
110                 vtree_util:max({KeyMax, MbbMax}, Less)]
111            end, lists:zip(Key, Mbb))
112    end, nil, KvList).
113
114
115-spec merge_mbbs([[number()]] | nil, [[number()]] | nil) -> [[number()]] | nil.
116merge_mbbs(nil, MbbB) ->
117    MbbB;
118merge_mbbs(MbbA, nil) ->
119    MbbA;
120merge_mbbs(MbbA, MbbB) ->
121    Less = fun(A, B) -> A < B end,
122    lists:map(fun({[MinA, MaxA], [MinB, MaxB]}) ->
123        [vtree_util:min({MinA, MinB}, Less),
124         vtree_util:max({MaxA, MaxB}, Less)]
125    end, lists:zip(MbbA, MbbB)).
126
127
128% Convert the key from a list of 2-tuples to a list of raw doubles
129% NOTE vmx 2014-12-12: The first case could be more efficient, but for now
130% it's good enough. Once the enclosing MBB for the initial index build is
131% no longer needed, this case can go away as `calc_mbb` will be no longer
132% needed in `write_kvs/3`.
133-spec convert_primary_index_kvs_to_binary(
134        [{{binary(), binary()},
135          {partition_id(),
136           {dups, [{binary(), binary()}]} | [{binary(), binary()}]}}],
137        #set_view_group{}, [{binary(), binary()}]) -> [{binary(), binary()}].
138convert_primary_index_kvs_to_binary([], _Group, Acc) ->
139    lists:reverse(Acc);
140convert_primary_index_kvs_to_binary([H | Rest], Group, Acc)->
141    {{Key, DocId}, {PartId, Value}} = H,
142    KeyBin = encode_key_docid(Key, DocId),
143    couch_set_view_util:check_primary_key_size(
144        KeyBin, ?MAX_KEY_SIZE, Key, DocId, Group),
145    ValueBin = convert_values_to_binary(Value, Key, DocId, Group),
146    ValueBin2 = <<PartId:16, ValueBin/binary>>,
147    couch_set_view_util:check_primary_value_size(
148        ValueBin2, ?MAX_VIEW_ALL_VALUES_SIZE, Key, DocId, Group),
149    convert_primary_index_kvs_to_binary(
150        Rest, Group, [{KeyBin, ValueBin2} | Acc]).
151
152
153-spec convert_body_geometry_to_binary({binary(), binary()}, [[number()]], binary(),
154                                      any()) -> binary().
155convert_body_geometry_to_binary({Body, Geom}, Key, DocId, Group) ->
156    couch_set_view_util:check_primary_value_size(
157        Body, ?MAX_VIEW_SINGLE_VALUE_SIZE, Key, DocId, Group),
158    check_primary_geometry_size(
159        Geom, ?MAX_VIEW_GEOMETRY_SIZE, Key, DocId, Group),
160    <<(byte_size(Body)):?VIEW_SINGLE_VALUE_BITS,
161      (byte_size(Geom)):?VIEW_GEOMETRY_BITS,
162      Body/binary, Geom/binary>>.
163
164
165-spec convert_values_to_binary(
166        {dups, [{binary(), binary()}]} | {binary(), binary()},
167        [[number()]], binary(), #set_view_group{}) -> binary().
168convert_values_to_binary({dups, Values}, Key, DocId, Group) ->
169    lists:foldl(fun(V, Acc2) ->
170        Bin = convert_body_geometry_to_binary(V, Key, DocId, Group),
171        <<Acc2/binary, Bin/binary>>
172    end, <<>>, Values);
173convert_values_to_binary(Value, Key, DocId, Group) ->
174    convert_body_geometry_to_binary(Value, Key, DocId, Group).
175
176
177
178-spec encode_key_docid(binary() | [[number()]], binary()) -> binary().
179encode_key_docid(BinMbb, DocId) when is_binary(BinMbb) ->
180    % A double has 8 bytes
181    NumDobules = byte_size(BinMbb) div 8,
182    <<NumDobules:16, BinMbb/binary, DocId/binary>>;
183encode_key_docid(Key, DocId) ->
184    BinKey = encode_key(Key),
185    % Prefix the key with the number of doubles
186    <<(length(Key) * 2):16, BinKey/binary, DocId/binary>>.
187
188-spec encode_key([[number()]]) -> binary().
189encode_key(Key) ->
190    << <<Min:64/native-float, Max:64/native-float>> || [Min, Max] <- Key>>.
191
192
193-spec decode_key_docid(binary()) -> {[number()], binary()}.
194decode_key_docid(<<NumDoubles:16, Rest/binary>>) ->
195    % A double has 8 bytes
196    KeySize = NumDoubles * 8,
197    <<BinKey:KeySize/binary, DocId/binary>> = Rest,
198    Key = vtree_io:decode_mbb(BinKey),
199    {Key, DocId}.
200
201
202% Build the tree out of the sorted files
203-spec finish_build(#set_view_group{}, dict(), string()) ->
204                          {#set_view_group{}, pid()}.
205finish_build(Group, TmpFiles, TmpDir) ->
206    #set_view_group{
207        sig = Sig,
208        id_btree = IdBtree,
209        views = SetViews
210    } = Group,
211    case os:find_executable("couch_view_index_builder") of
212    false ->
213        Cmd = nil,
214        throw(<<"couch_view_index_builder command not found">>);
215    Cmd ->
216        ok
217    end,
218    Options = [exit_status, use_stdio, stderr_to_stdout, {line, 4096}, binary],
219    Port = open_port({spawn_executable, Cmd}, Options),
220
221    % The external process that builds up the tree needs to know the enclosing
222    % bounding box of the data. The easiest way is to temporarily store it as
223    % a vtree that will later on be replaced with the real data.
224    SetViews2 = lists:map(fun(SetView) ->
225        #set_view{
226            id_num = Id,
227            indexer = View
228        } = SetView,
229        #set_view_tmp_file_info{
230            extra = Mbb
231        } = dict:fetch(Id, TmpFiles),
232        Key = case Mbb of
233        nil ->
234            [];
235        _ ->
236            [list_to_tuple(M) || M <- Mbb]
237        end,
238        SetView#set_view{
239            indexer = View#spatial_view{
240                vtree = #vtree{
241                    root = #kp_node{
242                        key = Key
243                    }
244                }
245            }
246        }
247    end, SetViews),
248
249    Group2 = Group#set_view_group{views = SetViews2},
250    couch_set_view_util:send_group_info(Group2, Port),
251    true = port_command(Port, [TmpDir, $\n]),
252    #set_view_tmp_file_info{name = IdFile} = dict:fetch(ids_index, TmpFiles),
253    DestPath = couch_set_view_util:new_sort_file_path(TmpDir, updater),
254    true = port_command(Port, [DestPath, $\n, IdFile, $\n]),
255    lists:foreach(
256        fun(#set_view{id_num = Id}) ->
257            #set_view_tmp_file_info{
258                name = ViewFile
259            } = dict:fetch(Id, TmpFiles),
260            true = port_command(Port, [ViewFile, $\n])
261        end,
262        SetViews2),
263
264    try
265        couch_set_view_updater_helper:index_builder_wait_loop(Port, Group2, [])
266    after
267        catch port_close(Port)
268    end,
269
270    {ok, NewFd} = couch_file:open(DestPath),
271    unlink(NewFd),
272    {ok, HeaderBin, NewHeaderPos} = couch_file:read_header_bin(NewFd),
273    HeaderSig = couch_set_view_util:header_bin_sig(HeaderBin),
274    case HeaderSig == Sig of
275    true ->
276        ok;
277    false ->
278        couch_file:close(NewFd),
279        ok = file2:delete(DestPath),
280        throw({error, <<"Corrupted initial build destination file.\n">>})
281    end,
282    NewHeader = couch_set_view_util:header_bin_to_term(HeaderBin),
283    #set_view_index_header{
284        id_btree_state = NewIdBtreeRoot,
285        view_states = NewViewRoots
286    } = NewHeader,
287    NewIdBtree = couch_btree:set_state(IdBtree#btree{fd = NewFd}, NewIdBtreeRoot),
288    NewSetViews = lists:zipwith(
289        % XXX vmx 2014-02-04: Refactor this function out into one called
290        %     `update_states`
291        fun(#set_view{indexer = View} = V, NewRoot) ->
292            #spatial_view{vtree = Vt} = View,
293            NewVt = set_vtree_state(Vt#vtree{fd = NewFd}, NewRoot),
294            NewView = View#spatial_view{vtree = NewVt},
295            V#set_view{indexer = NewView}
296        end,
297        SetViews2, NewViewRoots),
298
299    NewGroup = Group#set_view_group{
300        id_btree = NewIdBtree,
301        views = NewSetViews,
302        index_header = NewHeader,
303        header_pos = NewHeaderPos
304    },
305    {NewGroup, NewFd}.
306
307
308% In order to build the spatial index bottom-up we need to have supply
309% the enclosing bounding box
310view_info(#spatial_view{vtree = Vt}) ->
311    case Vt#vtree.root of
312    nil ->
313        [<<"0">>, $\n];
314    #kp_node{key = Mbb0, childpointer = Pointer} ->
315        Mbb = case Mbb0 of
316        nil ->
317            Children = vtree_io:read_node(Vt#vtree.fd, Pointer),
318            vtree_util:nodes_mbb(Children, Vt#vtree.less);
319        _ ->
320            Mbb0
321       end,
322       MbbEncoded = vtree_io:encode_mbb(Mbb),
323       Dimension = list_to_binary(integer_to_list(length(Mbb))),
324       [Dimension, $\n, MbbEncoded]
325    end.
326
327% Return the state of a view (which will be stored in the header)
328get_state(View) ->
329    get_vtree_state(View#spatial_view.vtree).
330
331% XXX vmx 2013-01-02: Should perhaps be moved to the vtree itself (and then
332%    renamed to `get_state`
333get_vtree_state(Vt) ->
334    case Vt#vtree.root of
335    nil ->
336        nil;
337    #kp_node{} = Root ->
338        #kp_node{
339            childpointer = Pointer,
340            treesize = Size,
341            mbb_orig = MbbOrig
342        } = Root,
343        BinMbbOrig = vtree_io:encode_mbb(MbbOrig),
344        % The length is the number of doubles, not the dimension, hence use
345        % the length two times the length of MbbOrig
346        NumMbbOrig = length(MbbOrig) * 2,
347        Rest = <<NumMbbOrig:16, BinMbbOrig/binary>>,
348        <<Pointer:?POINTER_BITS, Size:?TREE_SIZE_BITS, Rest/binary>>
349    end.
350
351set_state(View, State) ->
352    Vt = set_vtree_state(View#spatial_view.vtree, State),
353    View#spatial_view{vtree = Vt}.
354
355% XXX vmx 2014-02-04: Should perhaps be moved to the vtree itself (and then
356%    renamed to `set_state`
357set_vtree_state(Vt, Root0) ->
358    Root = case Root0 of
359    nil ->
360        nil;
361    <<Pointer:?POINTER_BITS, Size:?TREE_SIZE_BITS, Rest/binary>> ->
362        <<_NumMbb:16, BinMbb/binary>> = Rest,
363        MbbOrig = vtree_io:decode_mbb(BinMbb),
364        #kp_node{
365            % The root node pointer doesn't have a key
366            key = nil,
367            childpointer = Pointer,
368            treesize = Size,
369            mbb_orig = MbbOrig
370        }
371    end,
372    Vt#vtree{root = Root}.
373
374
375% The spatial index doesn't store the bitmap for the full structure,
376% it only stores the partition IDs in the leaf nodes. Those get
377% filtered out on query time
378view_bitmap(_View) ->
379    0.
380
381view_name(#set_view_group{views = SetViews}, ViewPos) ->
382    View = (lists:nth(ViewPos, SetViews))#set_view.indexer,
383    case View#spatial_view.map_names of
384    [Name | _] ->
385        Name
386    end.
387
388
389% This function does what the C-based native updater does for mapreduce views
390-spec update_spatial([#set_view{}], [string()], non_neg_integer()) ->
391                            [#set_view{}].
392update_spatial(Views, [], _) ->
393    Views;
394update_spatial(Views, LogFiles, MaxBatchSize) ->
395    lists:zipwith(fun(View, LogFile) ->
396        {ok, Fd} = file2:open(LogFile, [read, raw, binary, read_ahead]),
397        try
398            process_log_file(View, Fd, MaxBatchSize, 0, [])
399        after
400            ok = file:close(Fd)
401        end
402    end, Views, LogFiles).
403
404
405-spec process_log_file(#set_view{}, file:io_device(), non_neg_integer(),
406                       non_neg_integer(),
407                       [{insert, binary(), binary()} | {remove, binary()}]) ->
408                              #set_view{}.
409% Don't include the last operation as it made the batch size bigger than the
410% maximum batch size
411process_log_file(View0, LogFd, MaxBatchSize, BatchSize, [H | Ops])
412        when BatchSize > MaxBatchSize ->
413    View = flush_writes(View0, lists:reverse(Ops)),
414    process_log_file(View, LogFd, MaxBatchSize, 0, [H]);
415process_log_file(View, LogFd, MaxBatchSize, BatchSize, Ops) ->
416    case file:read(LogFd, 4) of
417    {ok, <<Size:32/native>>} ->
418        {ok, <<OpCode:8, KeySize:16, Key:KeySize/binary, Value/binary>>} =
419            file:read(LogFd, Size),
420        OpAtom = couch_set_view_updater_helper:code_to_op(OpCode),
421        Op = case OpAtom of
422        insert ->
423            {OpAtom, Key, Value};
424        remove ->
425            {OpAtom, Key}
426        end,
427        process_log_file(View, LogFd, MaxBatchSize, BatchSize + Size - 1 - 2, [Op | Ops]);
428    eof ->
429        flush_writes(View, lists:reverse(Ops));
430    {error, Error} ->
431        throw({file_read_error, Error})
432    end.
433
434% Insert the data into the view file
435-spec flush_writes(#set_view{},
436                   [{insert, binary(), binary()} | {remove, binary()}]) ->
437                          #set_view{}.
438flush_writes(SetView, Ops) ->
439    #set_view{
440        indexer = #spatial_view{
441            vtree = Vt
442        } = View
443    } = SetView,
444    {OpsToAdd, OpsToRemove} = lists:partition(
445        fun({insert, _, _}) -> true;
446           ({remove, _}) -> false
447        end, Ops),
448    KvNodesToRemove = lists:map(fun({remove, KeyDocId}) ->
449        {Key, DocId} = decode_key_docid(KeyDocId),
450        #kv_node{
451            key = Key,
452            docid = DocId
453        }
454    end, OpsToRemove),
455    KvNodesToAdd = lists:map(fun({insert, KeyDocId, Value}) ->
456        {Key, DocId} = decode_key_docid(KeyDocId),
457        <<PartId:16, Values/binary>> = Value,
458        BodyGeoms = vtree_io:decode_dups(Values),
459        {Body, Geom} = case BodyGeoms of
460        [{B, G}] ->
461           {B, G};
462        _ ->
463           {BodyDups, GeomDups} = lists:unzip(BodyGeoms),
464           {{dups, BodyDups}, {dups, GeomDups}}
465        end,
466        #kv_node{
467            key = Key,
468            docid = DocId,
469            body = Body,
470            partition = PartId,
471            geometry = Geom
472        }
473    end, OpsToAdd),
474    Vt2 = vtree_delete:delete(Vt, KvNodesToRemove),
475    Vt3 = vtree_insert:insert(Vt2, KvNodesToAdd),
476    SetView#set_view{
477        indexer = View#spatial_view{
478            vtree = Vt3
479        }
480    }.
481
482
483-spec design_doc_to_set_view_group(binary(), #doc{}) -> #set_view_group{}.
484design_doc_to_set_view_group(SetName, #doc{id = Id, body = {Fields}}) ->
485    {DesignOptions} = couch_util:get_value(<<"options">>, Fields, {[]}),
486    {RawViews} = couch_util:get_value(<<"spatial">>, Fields, {[]}),
487    % add the views to a dictionary object, with the map source as the key
488    DictBySrc =
489    lists:foldl(
490        fun({Name, MapSrc}, DictBySrcAcc) ->
491            View =
492            case dict:find(MapSrc, DictBySrcAcc) of
493                {ok, View0} -> View0;
494                error -> #set_view{def = MapSrc, indexer = #spatial_view{}}
495            end,
496            MapNames = [Name | View#set_view.indexer#spatial_view.map_names],
497            View2 = View#set_view{indexer = #spatial_view{map_names = MapNames}},
498            dict:store(MapSrc, View2, DictBySrcAcc)
499        end, dict:new(), RawViews),
500    % number the views
501    {SetViews, _N} = lists:mapfoldl(
502        fun({_Src, SetView}, N) ->
503            {SetView#set_view{id_num = N}, N + 1}
504        end,
505        0, lists:sort(dict:to_list(DictBySrc))),
506    SetViewGroup = #set_view_group{
507        set_name = SetName,
508        name = Id,
509        views = SetViews,
510        design_options = DesignOptions,
511        mod = ?MODULE,
512        extension = index_extension()
513    },
514    set_view_sig(SetViewGroup).
515
516
517% Make sure the signature changes whenever the definitions of the spatial
518% views change. This way a view gets re-generated. Re-sorting the view
519% definitions within the JSON object won't change the signature.
520-spec set_view_for_sig(#set_view{}) -> [binary()].
521set_view_for_sig(SetView) ->
522    #set_view{
523        def = Def,
524        indexer = #spatial_view{
525            map_names = MapNames
526        }
527    } = SetView,
528    [MapNames, Def].
529
530
531-spec set_view_sig(#set_view_group{}) -> #set_view_group{}.
532set_view_sig(#set_view_group{views = SetViews} = Group) ->
533    % With using an iolist for the signature, it's easier possible to replace
534    % it with a C-based implementation in the future. It's also easier to add
535    % additional optional things without changing the signature of exisisting
536    % views.
537    SetViews2 = [set_view_for_sig(SetView) || SetView <- SetViews],
538    Sig = couch_util:md5(iolist_to_binary(SetViews2)),
539    Group#set_view_group{sig = Sig}.
540
541
542-spec index_extension() -> string().
543index_extension() ->
544    ".spatial".
545
546
547-spec view_group_data_size(#btree{}, [#set_view{}]) -> non_neg_integer().
548view_group_data_size(IdBtree, Views) ->
549    lists:foldl(
550        fun(SetView, Acc) ->
551            Root = ((SetView#set_view.indexer)#spatial_view.vtree)#vtree.root,
552            Acc + vtree_io:treesize(Root)
553        end,
554        couch_btree:size(IdBtree),
555        Views).
556
557
558
559reset_view(View) ->
560    View#spatial_view{vtree = nil}.
561
562
563setup_views(Fd, _BtreeOptions, _Group, ViewStates, Views) ->
564    KvChunkThreshold = couch_config:get(
565        "spatial_views", "vtree_kv_node_threshold", "2000"),
566    KpChunkThreshold = couch_config:get(
567        "spatial_views", "vtree_kp_node_threshold", "2000"),
568    MinFillRate = couch_config:get(
569        "spatial_views", "vtree_min_fill_rate", "0.4"),
570
571    lists:zipwith(fun(VTState, SetView) ->
572        Less = fun(A, B) -> A < B end,
573        Root = case VTState of
574        nil ->
575            nil;
576        <<Pointer:?POINTER_BITS, Size:?TREE_SIZE_BITS, Rest/binary>> ->
577            <<_NumMbb:16, BinMbb/binary>> = Rest,
578            MbbOrig = vtree_io:decode_mbb(BinMbb),
579            #kp_node{
580                % The root node pointer doesn't have a key
581                key = nil,
582                childpointer = Pointer,
583                treesize = Size,
584                mbb_orig = MbbOrig
585            }
586        end,
587        Vtree = #vtree{
588            root = Root,
589            min_fill_rate = list_to_float(MinFillRate),
590            kp_chunk_threshold = list_to_integer(KpChunkThreshold),
591            kv_chunk_threshold = list_to_integer(KvChunkThreshold),
592            less = Less,
593            fd = Fd
594        },
595        Indexer = SetView#set_view.indexer,
596        SetView#set_view{indexer = Indexer#spatial_view{vtree = Vtree}}
597    end,
598    ViewStates, Views).
599
600
601% The vtree currently doesn't support purges, hence this is a no-op
602% XXX vmx 2014-07-30: This is needed to have correct results after a rebalance
603clean_views(SetViews0, CleanupParts) ->
604    SetViews = lists:map(fun(SetView) ->
605        View = SetView#set_view.indexer,
606        Vt0 = View#spatial_view.vtree,
607        % NOTE vmx 2014-08-05: currently the tree manipulation code expects
608        % KV-nodes as input
609        CleanupNodes = [#kv_node{partition = PartId} ||
610            PartId <- CleanupParts],
611        Vt = vtree_cleanup:cleanup(Vt0, CleanupNodes),
612        SetView#set_view{
613            indexer = View#spatial_view{
614                vtree = Vt
615            }
616        }
617    end, SetViews0),
618    {0, SetViews}.
619
620
621-spec get_row_count(#set_view{}) -> non_neg_integer().
622get_row_count(_SetView) ->
623    % XXX vmx 2013-07-04: Implement it properly with reduces. For now
624    %     just always return 0 and hope it doesn't brak anything.
625    0.
626
627
628make_wrapper_fun(Fun, Filter) ->
629    % TODO vmx 2013-07-22: Support de-duplication
630    case Filter of
631    false ->
632        fun(Node, Acc) ->
633            ExpandedNode = expand_dups(Node),
634            fold_fun(Fun, ExpandedNode, Acc)
635        end;
636    {true, _, IncludeBitmask} ->
637        fun(Node, Acc) ->
638            case (1 bsl Node#kv_node.partition) band IncludeBitmask of
639            0 ->
640                {ok, Acc};
641            _ ->
642                ExpandedNode = expand_dups(Node),
643                fold_fun(Fun, ExpandedNode, Acc)
644           end
645        end
646    end.
647
648
649-spec fold_fun(
650        fun(({{[[number()]], binary()}, {partition_id(), binary(), json()}},
651             any()) -> {ok | stop, any()}),
652        [#kv_node{}], any()) -> {ok | stop, any()}.
653fold_fun(_Fun, [], Acc) ->
654    {ok, Acc};
655fold_fun(Fun, [Node | Rest], Acc) ->
656    #kv_node{
657        key = Key0,
658        docid = DocId,
659        body = Body,
660        geometry = Geom,
661        partition = PartId
662    } = Node,
663    case Geom of
664    <<>> ->
665        JsonGeom = nil;
666    _ ->
667        {ok, JsonGeom} = wkb_reader:wkb_to_geojson(Geom)
668    end,
669    % NOTE vmx 2013-07-11: The key needs to be able to be encoded as JSON.
670    %     Think about how to encode the MBB so less conversion is needed.
671    Key = [[Min, Max] || {Min, Max} <- Key0],
672    case Fun({{Key, DocId}, {PartId, Body, JsonGeom}}, Acc) of
673    {ok, Acc2} ->
674        fold_fun(Fun, Rest, Acc2);
675    {stop, Acc2} ->
676        {stop, Acc2}
677    end.
678
679
680fold(View, WrapperFun, Acc, Options) ->
681    #spatial_view{
682       vtree = #vtree{
683           root = Root
684       } = Vt
685    } = View,
686    case Root of
687    nil ->
688        {ok, nil, Acc};
689     % Use the original MBB for comparison as the key is not necessarily
690     % set. The original MBB is good enough for this check as it will have
691     % the same dimesionality.
692    #kp_node{mbb_orig = MbbOrig} ->
693        Range = couch_util:get_value(range, Options, []),
694        Result = case Range of
695        [] ->
696            vtree_search:all(Vt, WrapperFun, Acc);
697        _ when length(Range) =/= length(MbbOrig) ->
698            throw(list_to_binary(io_lib:format(
699                "The query range must have the same dimensionality as "
700                "the index. Your range was `~10000p`, but the index has a "
701                "dimensionality of `~p`.", [Range, length(MbbOrig)])));
702        _ ->
703            vtree_search:search(Vt, [Range], WrapperFun, Acc)
704        end,
705        {ok, nil, Result}
706    end.
707
708
709-spec expand_dups(#kv_node{}) -> [#kv_node{}].
710expand_dups(#kv_node{body = {dups, BodyDups}, geometry = {dups, GeomDups}}
711        = Node) ->
712    [Node#kv_node{body = B, geometry = G} || {B, G}
713        <- lists:zip(BodyDups, GeomDups)];
714expand_dups(Node) ->
715    [Node].
716
717
718% The following functions have their counterpart in couch_set_view
719
720get_spatial_view(SetName, DDoc, ViewName, Req) ->
721    #set_view_group_req{wanted_partitions = WantedPartitions} = Req,
722    try
723        {ok, Group0} = couch_set_view:get_group(
724            spatial_view, SetName, DDoc, Req),
725        {Group, Unindexed} = couch_set_view:modify_bitmasks(
726            Group0, WantedPartitions),
727        case get_spatial_view0(ViewName, Group#set_view_group.views) of
728        {ok, View} ->
729            {ok, View, Group, Unindexed};
730        Else ->
731            Else
732        end
733    catch
734    throw:{error, empty_group} ->
735        {not_found, missing_named_view}
736    end.
737
738get_spatial_view0(_Name, []) ->
739    {not_found, missing_named_view};
740get_spatial_view0(Name, [#set_view{} = View|Rest]) ->
741    MapNames = (View#set_view.indexer)#spatial_view.map_names,
742    case lists:member(Name, MapNames) of
743        true -> {ok, View};
744        false -> get_spatial_view0(Name, Rest)
745    end.
746
747
748% Processes the query paramters to extract the significant values when
749% traversing the vtree
750-spec make_key_options(#spatial_query_args{}) -> [{atom(), term()}].
751make_key_options(ViewQueryArgs) ->
752    [{range, ViewQueryArgs#spatial_query_args.range}].
753
754
755-spec should_filter(#spatial_query_args{}) -> boolean().
756should_filter(ViewQueryArgs) ->
757    ViewQueryArgs#spatial_query_args.filter.
758
759
760stats_ets(prod) ->
761    ?SPATIAL_VIEW_STATS_ETS_PROD;
762stats_ets(dev) ->
763    ?SPATIAL_VIEW_STATS_ETS_DEV.
764
765server_name(prod) ->
766    ?SPATIAL_VIEW_SERVER_NAME_PROD;
767server_name(dev) ->
768    ?SPATIAL_VIEW_SERVER_NAME_DEV.
769
770sig_to_pid_ets(prod) ->
771    ?SPATIAL_VIEW_SIG_TO_PID_ETS_PROD;
772sig_to_pid_ets(dev) ->
773    ?SPATIAL_VIEW_SIG_TO_PID_ETS_DEV.
774
775name_to_sig_ets(prod) ->
776    ?SPATIAL_VIEW_NAME_TO_SIG_ETS_PROD;
777name_to_sig_ets(dev) ->
778    ?SPATIAL_VIEW_NAME_TO_SIG_ETS_DEV.
779
780pid_to_sig_ets(prod) ->
781    ?SPATIAL_VIEW_PID_TO_SIG_ETS_PROD;
782pid_to_sig_ets(dev) ->
783    ?SPATIAL_VIEW_PID_TO_SIG_ETS_DEV.
784
785
786% The key might contain a geometry or points instead of ranges for certain
787% dimensions.
788% In case of a geometry, calculate the bounding box and return it.
789% In case of a point instead of a range, create a collapsed range
790-spec maybe_process_key(binary()) -> {[[number()]], geom() | nil}.
791maybe_process_key(Key0) ->
792    Key = ?JSON_DECODE(Key0),
793    case Key of
794    % Legacy API when only a geometry is given
795    {Geom} ->
796        process_geometry(Geom);
797    [_ | _] ->
798        case Key of
799        [{Geom} | T] ->
800            {Bbox, Geom2} = process_geometry(Geom),
801            Key2 = Bbox ++ T;
802        _ ->
803            Key2 = Key,
804            Geom2 = <<>>
805        end,
806        Key3 = lists:map(
807            fun([]) ->
808                throw({emit_key, <<"A range cannot be an empty array.">>});
809            ([_SingleElementList]) ->
810                throw({emit_key, <<"A range cannot be single element "
811                                   "array.">>});
812            ([Min, Max]) when not (is_number(Min) andalso is_number(Max)) ->
813                throw({emit_key, <<"Ranges must be numbers.">>});
814            ([Min, Max]) when Min > Max ->
815                throw({emit_key, <<"The minimum of a range must be smaller "
816                                   "than the maximum.">>});
817            ([Min, Max]) ->
818                [Min, Max];
819            (SingleValue) when is_tuple(SingleValue)->
820                throw({emit_key, <<"A geometry is only allowed as the first "
821                                   "element in the array.">>});
822            (SingleValue) when not is_number(SingleValue)->
823                throw({emit_key, <<"The values of the key must be numbers or "
824                                   "a GeoJSON geometry.">>});
825            (SingleValue) ->
826                [SingleValue, SingleValue]
827            end,
828        Key2),
829        {Key3, Geom2};
830    _ ->
831        throw({emit_key, <<"The key must be an array of numbers which might"
832                           "have a GeoJSON geometry as first element.">>})
833    end.
834
835
836% Returns an Erlang encoded geometry and the corresponding bounding box
837process_geometry(Geo) ->
838    Bbox = try
839        Type = binary_to_atom(proplists:get_value(<<"type">>, Geo), utf8),
840        case Type of
841        'GeometryCollection' ->
842            Geometries = proplists:get_value(<<"geometries">>, Geo),
843            lists:foldl(fun({Geometry}, CurBbox) ->
844                Type2 = binary_to_atom(
845                    proplists:get_value(<<"type">>, Geometry), utf8),
846                Coords = proplists:get_value(<<"coordinates">>, Geometry),
847                case proplists:get_value(<<"bbox">>, Geo) of
848                undefined ->
849                    extract_bbox(Type2, Coords, CurBbox);
850                [W, S, E, N] ->
851                    [[W, E], [S, N]]
852                end
853            end, nil, Geometries);
854        _ ->
855            Coords = proplists:get_value(<<"coordinates">>, Geo),
856            case proplists:get_value(<<"bbox">>, Geo) of
857            undefined ->
858                extract_bbox(Type, Coords);
859            [W, S, E, N] ->
860                [[W, E], [S, N]]
861            end
862        end
863    catch _:badarg ->
864        throw({emit_key, <<"The supplied geometry must be valid GeoJSON.">>})
865    end,
866    {ok, Geom} = wkb_writer:geojson_to_wkb({Geo}),
867    {Bbox, Geom}.
868
869
870extract_bbox(Type, Coords) ->
871    extract_bbox(Type, Coords, nil).
872
873extract_bbox(Type, Coords, InitBbox) ->
874    case Type of
875    'Point' ->
876        bbox([Coords], InitBbox);
877    'LineString' ->
878        bbox(Coords, InitBbox);
879    'Polygon' ->
880        % holes don't matter for the bounding box
881        bbox(hd(Coords), InitBbox);
882    'MultiPoint' ->
883        bbox(Coords, InitBbox);
884    'MultiLineString' ->
885        lists:foldl(fun(Linestring, CurBbox) ->
886            bbox(Linestring, CurBbox)
887        end, InitBbox, Coords);
888    'MultiPolygon' ->
889        lists:foldl(fun(Polygon, CurBbox) ->
890            bbox(hd(Polygon), CurBbox)
891        end, InitBbox, Coords);
892    InvalidType ->
893        throw({emit_key,
894            <<"The supplied geometry type `",
895              (atom_to_binary(InvalidType, latin1))/binary,
896              "` is not a valid GeoJSON. "
897              "Valid geometry types are (case sensitive): "
898              "Point, LineString, Polygon, MultiPoint, MultiLineString, "
899              "MultiLineString">>})
900    end.
901
902bbox([], Range) ->
903    Range;
904bbox([[X, Y]|Rest], nil) ->
905    bbox(Rest, [[X, X], [Y, Y]]);
906bbox([Coords|Rest], Range) ->
907    Range2 = lists:zipwith(
908        fun(Coord, [Min, Max]) ->
909            [erlang:min(Coord, Min), erlang:max(Coord, Max)]
910        end, Coords, Range),
911    bbox(Rest, Range2).
912
913
914-spec check_primary_geometry_size(binary(), pos_integer(), [number()],
915        binary(), #set_view_group{}) -> ok.
916check_primary_geometry_size(Bin, Max, Key, DocId, Group) when byte_size(Bin) > Max ->
917    #set_view_group{set_name = SetName, name = DDocId, type = Type} = Group,
918    Error = iolist_to_binary(
919        io_lib:format("geometry emitted for key `~s`, document `~s`, is too big"
920                      " (~p bytes)", [Key, DocId, byte_size(Bin)])),
921    ?LOG_MAPREDUCE_ERROR("Bucket `~s`, ~s group `~s`, ~s",
922                         [SetName, Type, DDocId, Error]),
923    throw({error, Error});
924check_primary_geometry_size(_Bin, _Max, _Key, _DocId, _Group) ->
925    ok.
926
927
928-spec convert_back_index_kvs_to_binary(
929        [{binary(), {partition_id(), {binary(), {partition_id(), binary()}}}}],
930        [{binary(), binary()}]) -> [{binary(), binary()}].
931convert_back_index_kvs_to_binary([], Acc)->
932    lists:reverse(Acc);
933convert_back_index_kvs_to_binary(
934        [{DocId, {PartId, ViewIdKeys}} | Rest], Acc) ->
935    ViewIdKeysBinary = lists:foldl(
936        fun({ViewId, Keys}, Acc2) ->
937            KeyListBinary = lists:foldl(
938                fun(Key, AccKeys) ->
939                    Key2 = encode_key(Key),
940                    <<AccKeys/binary, (byte_size(Key2)):16, Key2/binary>>
941                end,
942                <<>>, Keys),
943            NumKeys = length(Keys),
944            case NumKeys >= (1 bsl 16) of
945            true ->
946                ErrorMsg = io_lib:format(
947                    "Too many (~p) keys emitted for "
948                    "document `~s` (maximum allowed is ~p",
949                    [NumKeys, DocId, (1 bsl 16) - 1]),
950                throw({error, iolist_to_binary(ErrorMsg)});
951            false ->
952                ok
953            end,
954            <<Acc2/binary, ViewId:8, NumKeys:16, KeyListBinary/binary>>
955        end,
956        <<>>, ViewIdKeys),
957    KvBin = {<<PartId:16, DocId/binary>>,
958        <<PartId:16, ViewIdKeysBinary/binary>>},
959    convert_back_index_kvs_to_binary(Rest, [KvBin | Acc]).
960
961
962-spec view_insert_doc_query_results(
963        binary(), partition_id(), [set_view_key_value()],
964        [set_view_key_value()], [set_view_key_value()], [set_view_key()]) ->
965                                           {[set_view_key_value()],
966                                            [set_view_key()]}.
967view_insert_doc_query_results(_DocId, _PartitionId, [], [], ViewKVsAcc,
968        ViewIdKeysAcc) ->
969    {lists:reverse(ViewKVsAcc), lists:reverse(ViewIdKeysAcc)};
970view_insert_doc_query_results(DocId, PartitionId, [ResultKVs | RestResults],
971        [{View, KVs} | RestViewKVs], ViewKVsAcc, ViewIdKeysAcc) ->
972    ResultKvGeoms = [{maybe_process_key(Key), Val} || {Key, Val} <- ResultKVs],
973    % Take any identical keys and combine the values
974    {NewKVs, NewViewIdKeysAcc} = lists:foldl(
975        fun({{Key, Geom}, Val}, {[{{Key, PrevDocId} = Kd, PrevVal} | AccRest],
976                AccVid}) when PrevDocId =:= DocId ->
977            Dups = case PrevVal of
978            {PartitionId, {dups, Dups0}} ->
979                [{Val, Geom} | Dups0];
980            {PartitionId, UserPrevVal} ->
981                [{Val, Geom}, UserPrevVal]
982            end,
983            {[{Kd, {PartitionId, {dups, Dups}}} | AccRest], AccVid};
984        ({{Key, Geom}, Val}, {AccKv, AccVid}) ->
985            {[{{Key, DocId}, {PartitionId, {Val, Geom}}} | AccKv],
986                [Key | AccVid]}
987        end,
988        {KVs, []}, lists:sort(ResultKvGeoms)),
989    NewViewKVsAcc = [{View, NewKVs} | ViewKVsAcc],
990    case NewViewIdKeysAcc of
991    [] ->
992        NewViewIdKeysAcc2 = ViewIdKeysAcc;
993    _ ->
994        NewViewIdKeysAcc2 = [
995            {View#set_view.id_num, NewViewIdKeysAcc} | ViewIdKeysAcc]
996    end,
997    view_insert_doc_query_results(
998        DocId, PartitionId, RestResults, RestViewKVs, NewViewKVsAcc,
999        NewViewIdKeysAcc2).
1000
1001
1002-spec query_args_view_name(#spatial_query_args{}) -> binary().
1003query_args_view_name(#spatial_query_args{view_name = ViewName}) ->
1004    ViewName.
1005
1006
1007-spec validate_ddoc(#doc{}) -> ok.
1008validate_ddoc(#doc{body = {Body}}) ->
1009    Spatial = couch_util:get_value(<<"spatial">>, Body, {[]}),
1010    case Spatial of
1011    {Views} when is_list(Views) ->
1012        ok;
1013    _ ->
1014        Views = [],
1015        throw({error, <<"The field `spatial' is not a json object.">>})
1016    end,
1017    HasMapreduce = case couch_util:get_value(<<"views">>, Body, {[]}) of
1018    {[]} ->
1019        false;
1020    _ ->
1021        true
1022    end,
1023    case length(Views) > 0 andalso HasMapreduce of
1024    true ->
1025        throw({error, <<"A design document may only contain mapreduce *or* "
1026            "spatial views">>});
1027    false ->
1028        ok
1029    end,
1030    lists:foreach(
1031        fun({SpatialName, Value}) ->
1032            validate_spatial_definition(SpatialName, Value)
1033        end,
1034        Views).
1035
1036
1037-spec validate_spatial_definition(binary(), binary()) -> ok.
1038validate_spatial_definition(<<"">>, _) ->
1039    throw({error, <<"Spatial view name cannot be an empty string">>});
1040validate_spatial_definition(SpatialName, SpatialDef) when
1041        is_binary(SpatialDef) ->
1042    validate_spatial_name(SpatialName, iolist_to_binary(io_lib:format(
1043        "Spatial view name `~s` cannot have leading or trailing whitespace",
1044        [SpatialName]))),
1045    validate_spatial_function(SpatialName, SpatialDef);
1046validate_spatial_definition(SpatialName, _) ->
1047    ErrorMsg = io_lib:format("Value for spatial view `~s' is not "
1048                             "a string.", [SpatialName]),
1049    throw({error, iolist_to_binary(ErrorMsg)}).
1050
1051
1052% Make sure the view name doesn't contain leading or trailing whitespace
1053% (space, tab, newline or carriage return)
1054-spec validate_spatial_name(binary(), binary()) -> ok.
1055validate_spatial_name(<<" ", _Rest/binary>>, ErrorMsg) ->
1056    throw({error, ErrorMsg});
1057validate_spatial_name(<<"\t", _Rest/binary>>, ErrorMsg) ->
1058    throw({error, ErrorMsg});
1059validate_spatial_name(<<"\n", _Rest/binary>>, ErrorMsg) ->
1060    throw({error, ErrorMsg});
1061validate_spatial_name(<<"\r", _Rest/binary>>, ErrorMsg) ->
1062    throw({error, ErrorMsg});
1063validate_spatial_name(Bin, ErrorMsg) when size(Bin) > 1 ->
1064    Size = size(Bin) - 1 ,
1065    <<_:Size/binary, Trailing/bits>> = Bin,
1066    % Check for trailing whitespace
1067    validate_spatial_name(Trailing, ErrorMsg);
1068validate_spatial_name(_, _) ->
1069    ok.
1070
1071
1072-spec validate_spatial_function(binary(), binary()) -> ok.
1073validate_spatial_function(SpatialName, SpatialDef) ->
1074    case mapreduce:start_map_context(spatial_view, [SpatialDef]) of
1075    {ok, _Ctx} ->
1076        ok;
1077    {error, Reason} ->
1078        ErrorMsg = io_lib:format("Syntax error in the spatial function of"
1079                                 " the spatial view `~s': ~s",
1080                                 [SpatialName, Reason]),
1081        throw({error, iolist_to_binary(ErrorMsg)})
1082    end.
1083