xref: /6.6.0/geocouch/vtree/src/vtree_insert.erl (revision 263b2ce8)
1% Licensed under the Apache License, Version 2.0 (the "License"); you may not
2% use this file except in compliance with the License. You may obtain a copy of
3% the License at
4%
5%   http://www.apache.org/licenses/LICENSE-2.0
6%
7% Unless required by applicable law or agreed to in writing, software
8% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10% License for the specific language governing permissions and limitations under
11% the License.
12
13-module(vtree_insert).
14
15-include("vtree.hrl").
16-include("couch_db.hrl").
17
18-export([insert/2]).
19
20-ifdef(makecheck).
21-compile(nowarn_export_all).
22-compile(export_all).
23-endif.
24
25
26-spec insert(Vt :: #vtree{}, Nodes :: [#kv_node{}]) -> #vtree{}.
27insert(Vt, []) ->
28    Vt;
29insert(#vtree{root=nil}=Vt, Nodes) ->
30    T1 = erlang:monotonic_time(),
31    % If we would do single inserts, the first node that was inserted would
32    % have set the original Mbb `MbbO`
33    MbbO = (hd(Nodes))#kv_node.key,
34    Threshold = Vt#vtree.kv_chunk_threshold,
35
36    case ?ext_size(Nodes) > Threshold of
37        true ->
38            {Nodes2, Rest} = vtree_modify:get_overflowing_subset(
39                               Threshold, Nodes),
40            KpNodes = vtree_modify:write_nodes(Vt, Nodes2, MbbO),
41            Root = vtree_modify:write_new_root(Vt, KpNodes),
42            Vt2 = Vt#vtree{root=Root},
43
44            % NOTE vmx 2012-09-20: The value of `ArbitraryBulkSize` is
45            %     arbitrary, might be worth spending more benchmarking
46            % NOTE vmx 2012-09-20: You can call it premature optimization, but it's
47            %     really worth it. In the future the initial index building should be
48            %     replaces with something better
49            ArbitraryBulkSize = round(math:log(Threshold)+50),
50            Vt3 = insert_in_bulks(Vt2, Rest, ArbitraryBulkSize),
51            ?LOG_DEBUG("Insertion into empty tree took: ~ps~n",
52                      [erlang:convert_time_unit(erlang:monotonic_time() - T1, native, microsecond)/1000000]),
53            ?LOG_DEBUG("Root pos: ~p~n", [(Vt3#vtree.root)#kp_node.childpointer]),
54            Vt3;
55        false ->
56            [Root] = vtree_modify:write_nodes(Vt, Nodes, MbbO),
57            Vt#vtree{root=Root}
58    end;
59insert(Vt, Nodes) ->
60    T1 = erlang:monotonic_time(),
61    Root = Vt#vtree.root,
62    PartitionedNodes = [Nodes],
63    KpNodes = insert_multiple(Vt, PartitionedNodes, [Root]),
64    NewRoot = vtree_modify:write_new_root(Vt, KpNodes),
65    ?LOG_DEBUG("Insertion into existing tree took: ~ps~n",
66               [erlang:convert_time_unit(erlang:monotonic_time() - T1, native, microsecond)/1000000]),
67    Vt#vtree{root=NewRoot}.
68
69
70% NOTE vmx 2013-03-12: It might make sense to change the bulk size from
71%      using the number of nodes, to a byte size based way.
72% Insert the data in a certain chunks
73-spec insert_in_bulks(Vt :: #vtree{}, Nodes :: [#kv_node{}],
74                      BulkSize :: non_neg_integer()) -> #vtree{}.
75insert_in_bulks(Vt, [], _BulkSize) ->
76    Vt;
77insert_in_bulks(Vt, Nodes, BulkSize) when length(Nodes) > BulkSize ->
78    {Insert, Rest} = lists:split(BulkSize, Nodes),
79    Vt2 = insert(Vt, Insert),
80    insert_in_bulks(Vt2, Rest, BulkSize);
81insert_in_bulks(Vt, Nodes, _BulkSize) ->
82    insert(Vt, Nodes).
83
84
85-spec insert_multiple(Vt :: #vtree{}, ToInsert :: [#kv_node{}],
86                      Existing :: [#kp_node{}]) -> [#kp_node{}].
87insert_multiple(Vt, ToInsert, Existing) ->
88    ModifyFuns = {fun insert_nodes/2, fun partition_nodes/3},
89    vtree_modify:modify_multiple(Vt, ModifyFuns, ToInsert, Existing, []).
90
91
92-spec insert_nodes(ToInsert :: [#kv_node{}], Existing :: [#kv_node{}]) ->
93                          [#kv_node{}].
94insert_nodes(ToInsert, Existing) ->
95    ToInsert ++ Existing.
96
97
98% Partitions a list of nodes according to a list of MBBs which are given by
99% KP-nodes.
100-spec partition_nodes(ToPartition :: [#kv_node{}], KpNodes :: [#kp_node{}],
101                      Less :: lessfun()) -> [[#kv_node{}]].
102partition_nodes(ToPartition, KpNodes, Less) ->
103    Partitions0 = [[] || _ <- lists:seq(1, length(KpNodes))],
104    PartitionMbbs = [Node#kp_node.key || Node <- KpNodes],
105    % Choose the partition the new node should be inserted to.
106    % vtree_choode:choose_subtree/3 expects a list of 2-tuples with the MBB
107    % and any value you like. We use the index in the list as second element
108    % in the tuple, so we can insert the new nodes there easily.
109    NodesNumbered = lists:zip(PartitionMbbs,
110                              lists:seq(1, length(PartitionMbbs))),
111    lists:foldl(fun(Node, Partitions) ->
112                       {_, NodeIndex} = vtree_choose:choose_subtree(
113                                          NodesNumbered, Node#kv_node.key,
114                                          Less),
115                       add_to_nth(NodeIndex, Node, Partitions)
116               end, Partitions0, ToPartition).
117
118
119% Add some value to a certain position in a list of lists
120% `N` is the index starting at 1 for the first element
121-spec add_to_nth(N :: pos_integer(), Element :: any(),
122                 ListOfLists :: [list()]) -> [list()].
123add_to_nth(N, Element, ListOfLists) ->
124    {A, [Nth|B]} = lists:split(N-1, ListOfLists),
125    A ++ [[Element|Nth]] ++ B.
126