xref: /6.6.0/geocouch/vtree/src/vtree_modify.erl (revision 263b2ce8)
1% Licensed under the Apache License, Version 2.0 (the "License"); you may not
2% use this file except in compliance with the License. You may obtain a copy of
3% the License at
4%
5%   http://www.apache.org/licenses/LICENSE-2.0
6%
7% Unless required by applicable law or agreed to in writing, software
8% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10% License for the specific language governing permissions and limitations under
11% the License.
12
13% This module implements the modification of the vtree for insertion and
14% deletion. It follows the normal R-tree rules, but takes the "original MBB"
15% into account, which is RR*-tree specific. It calls out to modules for the
16% choosing the correct subtree and splitting the nodes.
17%
18% This insertion/deletion supports bulks, i.e. adding/removing multiple items
19% at the same time. This is faster than subsequent single inserts/deletes.
20% The reason for being faster is, that multiple nodes can be written
21% at once, before their parent node is written.
22%
23% This module is the central piece for the algorithm. The modules vtree_insert
24% and vtree_delete implement the specific actions that are different between
25% insertion and deletion. But as you can see they are very similar, only the
26% function that handle the KP-nodes during traversal and the KV-nodes during
27% modification are different.
28%
29% Here's a quick outline of the way the bulk operations work. First you have a
30% list of nodes that all want to be added/deleted to the tree. You start at
31% the root node:
32% 1. Get all child nodes
33% 2. Loop through all nodes that should be inserted/delete and group/partition
34%    them into chunks. All nodes that would be added to/removed from a certain
35%    node according to the partition function end up in one partition.
36% 3. Now keep traversing the tree in depth-first manner and start with 1.,
37%    until you hit a leaf node. There you do the actual insertion/deletion.
38
39-module(vtree_modify).
40
41-include("vtree.hrl").
42-include("couch_db.hrl").
43
44-export([write_new_root/2, write_nodes/3, modify_multiple/5,
45         get_overflowing_subset/2]).
46
47-ifdef(makecheck).
48-compile(nowarn_export_all).
49-compile(export_all).
50-endif.
51
52
53% Write a new root node for the given nodes. In case there are more than
54% than it can hold, write a new root recursively. Stop when the root is a
55% single node.
56-spec write_new_root(Vt :: #vtree{}, Nodes :: [#kp_node{} | #kv_node{}]) ->
57                            #kp_node{}.
58write_new_root(_Vt, [Root]) ->
59    Root;
60% The `write_nodes/3` call will handle the splitting if needed. It could
61% happen that the byte size of nodes returned by `write_nodes/3` is bigger
62% than the chunk threshold, hence the recursive call.
63write_new_root(Vt, Nodes) ->
64    MbbO = vtree_util:nodes_mbb(Nodes, Vt#vtree.less),
65    WrittenNodes = write_nodes(Vt, Nodes, MbbO),
66    write_new_root(Vt, WrittenNodes).
67
68
69% Add a list of nodes one by one into a list of nodes (think of the latter
70% list as list containing child nodes). The node the new nodes get inserted
71% to, will automatically be split.
72% The result will again be a list of multiple nodes, with a maximum number of
73% nodes that still satisfy the chunk threshold. The total number of elements
74% in the resulting list can be bigger than the node can actually hold, hence
75% you might need to call it recursively.
76-spec insert_into_nodes(Vt :: #vtree{},
77                        NodePartitions :: [[#kv_node{} | #kp_node{}]],
78                        MbbO :: mbb(), ToInsert :: [#kv_node{} | #kp_node{}])
79                       -> [[#kv_node{} | #kp_node{}]].
80insert_into_nodes(_Vt, NodePartitions, _MbbO, []) ->
81    NodePartitions;
82insert_into_nodes(Vt, NodePartitions, MbbO, [ToInsert|Rest]) ->
83    Less = Vt#vtree.less,
84    Mbb = get_key(ToInsert),
85    FillMax = get_chunk_threshold(Vt, ToInsert),
86
87    % Every node partition contains a list of nodes, the maximum number is
88    % given by the chunk threshold
89    % Start with calculating the MBBs of the partitions
90    PartitionMbbs = [vtree_util:nodes_mbb(Nodes, Less) ||
91                        Nodes <- NodePartitions],
92
93    % Choose the partition the new node should be inserted to.
94    % vtree_choode:choose_subtree/3 expects a list of 2-tuples with the MBB
95    % and any value you like. We use the index in the list as second element
96    % in the tuple, so we can insert the new nodes there easily.
97    NodesNumbered = lists:zip(PartitionMbbs,
98                              lists:seq(0, length(PartitionMbbs)-1)),
99    {_, NodeIndex} = vtree_choose:choose_subtree(NodesNumbered, Mbb, Less),
100    {A, [Nth|B]} = lists:split(NodeIndex, NodePartitions),
101    TmpInserted = [ToInsert | Nth],
102    NewNodes = case ?ext_size(TmpInserted) > FillMax of
103                   % Maximum number of nodes reached, hence split it
104                   true ->
105                       {C, D} = split_node(Vt, TmpInserted, MbbO),
106                       A ++ [C, D] ++ B;
107                   % No need to split the node, just insert the new one
108                   false ->
109                       A ++ [TmpInserted] ++ B
110               end,
111    insert_into_nodes(Vt, NewNodes, MbbO, Rest).
112
113
114-spec get_key(Node :: #kv_node{} | #kp_node{}) -> mbb().
115get_key(#kv_node{}=Node) ->
116    Node#kv_node.key;
117get_key(#kp_node{}=Node) ->
118    Node#kp_node.key.
119
120
121-spec get_chunk_threshold(Vt :: #vtree{}, Node :: #kv_node{} | #kp_node{}) ->
122                                 number().
123get_chunk_threshold(Vt, #kv_node{}) ->
124    Vt#vtree.kv_chunk_threshold;
125get_chunk_threshold(Vt, #kp_node{}) ->
126    Vt#vtree.kp_chunk_threshold.
127
128
129% Return a minimal subset of nodes that are just about bigger than `FillMax`
130-spec get_overflowing_subset(FillMax :: number(),
131                             Nodes :: [#kv_node{} | #kp_node{} |
132                                       split_node()]) ->
133                                    {[#kv_node{}], [#kv_node{}]} |
134                                    {[#kp_node{}], [#kp_node{}]} |
135                                    {[split_node()], [split_node()]}.
136get_overflowing_subset(FillMax, Nodes) ->
137    get_overflowing_subset(FillMax, Nodes, []).
138-spec get_overflowing_subset(FillMax :: number(),
139                             Nodes :: [#kv_node{} | #kp_node{} | split_node()],
140                             Acc :: [#kv_node{} | #kp_node{} |
141                                     split_node()]) ->
142                                    {[#kv_node{}], [#kv_node{}]} |
143                                    {[#kp_node{}], [#kp_node{}]} |
144                                    {[split_node()], [split_node()]}.
145get_overflowing_subset(_FillMax, [], Acc) ->
146    {lists:reverse(Acc), []};
147get_overflowing_subset(FillMax, [H|T]=Nodes, Acc) ->
148    case ?ext_size(Acc) < FillMax of
149        true ->
150            get_overflowing_subset(FillMax, T, [H|Acc]);
151        false ->
152            {lists:reverse(Acc), Nodes}
153    end.
154
155
156% `MbbO` is the original MBB when it was create the first time
157% It will return a list of KP-nodes. It might return more than the chunk
158% threshold (but the size of the nodes per node will be limited by the chunk
159% threshold).
160-spec write_nodes(Vt :: #vtree{}, Nodes :: [#kv_node{} | #kp_node{}],
161                  MbbO :: mbb()) -> [#kp_node{}].
162% All nodes were deleted, the current node is empty now
163write_nodes(_Vt, [], _MbbO) ->
164    [];
165write_nodes(Vt, [#kv_node{}|_]=Nodes, MbbO) ->
166    FillMax = Vt#vtree.kv_chunk_threshold,
167    Size = ?ext_size(Nodes),
168    write_nodes(Vt, Nodes, MbbO, FillMax, Size);
169write_nodes(Vt, [#kp_node{}|_]=Nodes, MbbO) ->
170    FillMax = Vt#vtree.kp_chunk_threshold,
171    Size = ?ext_size(Nodes),
172    write_nodes(Vt, Nodes, MbbO, FillMax, Size).
173% Too many nodes for a single split, hence do something smart to get
174% all nodes stored in a good way. First take the first FillMax nodes
175% and split then into two nodes. Then insert all other nodes one by
176% one into one of the two newly created nodes. The decision which node
177% to choose is done by vtree_choose:choose_subtree/3.
178-spec write_nodes(Vt :: #vtree{}, Nodes :: [#kv_node{} | #kp_node{}],
179                  MbbO :: mbb(), FillMax :: number(), Size :: pos_integer()) ->
180                         [#kp_node{}].
181write_nodes(Vt, Nodes, MbbO, FillMax, Size) when Size > FillMax ->
182    {FirstNodes, Rest} = get_overflowing_subset(FillMax, Nodes),
183    NewNodes = insert_into_nodes(Vt, [FirstNodes], MbbO, Rest),
184    write_multiple_nodes(Vt, NewNodes);
185% No split needed
186write_nodes(Vt, Nodes, MbbO, _FillMax, _Size) ->
187    % `write_multiple_nodes/2` isn't used here, as it's the only case,
188    % where we use the supplied MBBO and don't calculate a new one.
189    %write_multiple_nodes(Vt, [Nodes], [MbbO]).
190    #vtree{
191            fd = Fd,
192            less = Less
193          } = Vt,
194    {ok, WrittenNode} = vtree_io:write_node(Fd, Nodes, Less),
195    [WrittenNode#kp_node{mbb_orig = MbbO}].
196
197
198% Write multiple nodes at once. It is only called when a split happened, hence
199% the original MBB (`MbbO`) is reset.
200-spec write_multiple_nodes(Vt :: #vtree{},
201                           NodeList :: [[#kp_node{} | #kv_node{}]]) ->
202                                  [#kp_node{}].
203write_multiple_nodes(Vt, NodeList) ->
204    #vtree{
205            fd = Fd,
206            less = Less
207          } = Vt,
208    lists:map(
209      fun(Nodes) ->
210              {ok, WrittenNode} = vtree_io:write_node(Fd, Nodes, Less),
211              WrittenNode#kp_node{mbb_orig = WrittenNode#kp_node.key}
212      end, NodeList).
213
214
215% Splits a KV- or KP-Node. Needs to be called with a total size of the nodes
216% that can be accommodated by two nodes. It % operates with #kv_node{} and
217% #kp_node{} records and also returns those.
218-spec split_node(Vt :: #vtree{}, Nodes :: [#kv_node{} | #kp_node{}],
219                 MbbO :: mbb()) -> {[#kv_node{}], [#kv_node{}]} |
220                                   {[#kp_node{}], [#kp_node{}]}.
221split_node(Vt, [#kv_node{}|_]=Nodes, MbbO) ->
222    #vtree{
223       kv_chunk_threshold = FillMax,
224       min_fill_rate = MinRate,
225       less = Less
226      } = Vt,
227    SplitNodes = [{Node#kv_node.key, Node} || Node <- Nodes],
228    {SplitNodesA, SplitNodesB} = vtree_split:split_leaf(
229                                   SplitNodes, MbbO, FillMax*MinRate, FillMax,
230                                   Less),
231    {_, NodesA} = lists:unzip(SplitNodesA),
232    {_, NodesB} = lists:unzip(SplitNodesB),
233    {NodesA, NodesB};
234split_node(Vt, [#kp_node{}|_]=Nodes, MbbO) ->
235    #vtree{
236       kp_chunk_threshold = FillMax,
237       min_fill_rate = MinRate,
238       less = Less
239      } = Vt,
240    SplitNodes = [{Node#kp_node.key, Node} || Node <- Nodes],
241    {SplitNodesA, SplitNodesB} = vtree_split:split_inner(
242                                   SplitNodes, MbbO, FillMax*MinRate, FillMax,
243                                   Less),
244    {_, NodesA} = lists:unzip(SplitNodesA),
245    {_, NodesB} = lists:unzip(SplitNodesB),
246    {NodesA, NodesB}.
247
248
249% The following comment explains the insertion only to make it easier to
250% understand, though it works the same for deletions.
251%
252% This function inserts partitioned nodes into the corresponding position in
253% the tree. This means that the number of partitions equals the number of
254% nodes of the current level.
255% The whole algorithm inserts multiple nodes into the tree and tries to
256% minimize the rebuilding of the tree (caused by his append-only nature).
257% The goal is to write parent nodes only when all the inserts in their
258% children already happened. It's done by depth first traversal, where the
259% nodes that get inserted are partitioned according to the currently level.
260% Those partitions are then further divided while moving deeper.
261%
262% Here's an example:
263%
264%     PartitionedNodes = [[kv_node1, kv_node2], [kv_node3], [], [kv_node4]]
265%
266% The `PartitionedNodes` has 4 elements, hence the level it get inserted to
267% needs to have a exactly 4 nodes
268%
269%     CurrentLevel = [kp_node1, kp_node2, kp_node2, kp_node4]
270%
271% The Partitioned nodes get inserted into the `CurrentLevel`, hence
272%
273%     kp_node1 gets nodes kv_node1 and kv_node2 added
274%     kp_node2 gets node kv_node3 added
275%     kp_node3 keeps it's original children
276%     kp_node4 gets node kv_node4 added
277%
278% The KP-nodes are traversed recursively and the KV-Nodes that get added are
279% also partitioned recursively, so that in the end the KP-nodes get added
280% as leaf nodes to the existing KP-nodes.
281-spec modify_multiple(Vt :: #vtree{}, ModifyFuns :: {fun(), fun()},
282                      Modify :: [#kv_node{}], Existing :: [#kp_node{}],
283                      Acc :: [#kp_node{}]) ->
284                             [#kp_node{}].
285modify_multiple(_Vt, _ModifyFuns, [], [], Acc) ->
286    Acc;
287% This partition doesn't contain any nodes to delete, hence use the existing
288% ones and move on
289modify_multiple(Vt, ModifyFuns, [[]|SiblingPartitions],
290                [Existing|ExistingSiblings], Acc) ->
291    modify_multiple(Vt, ModifyFuns, SiblingPartitions, ExistingSiblings,
292                    [Existing|Acc]);
293modify_multiple(Vt, ModifyFuns, [ModifyNodes|SiblingPartitions],
294                [Existing|ExistingSiblings], Acc) ->
295    #vtree{
296            fd = Fd,
297            less = Less
298          } = Vt,
299    #kp_node{
300              childpointer = ChildPointer,
301              mbb_orig = MbbO
302            } = Existing,
303    {KvModifyFun, KpModifyFun} = ModifyFuns,
304    ExistingNodes = vtree_io:read_node(Fd, ChildPointer),
305    NewChildren =
306        case ExistingNodes of
307            [#kv_node{}|_] ->
308                KvModifyFun(ModifyNodes, ExistingNodes);
309            [#kp_node{}|_] ->
310                Partitions = KpModifyFun(ModifyNodes, ExistingNodes, Less),
311                % Moving deeper
312                modify_multiple(Vt, ModifyFuns, Partitions, ExistingNodes, [])
313    end,
314    WrittenNodes = write_nodes(Vt, NewChildren, MbbO),
315    % Moving sideways
316    modify_multiple(Vt, ModifyFuns, SiblingPartitions, ExistingSiblings,
317                    WrittenNodes ++ Acc).
318