1//  Copyright (c) 2016 Couchbase, Inc.
2//  Licensed under the Apache License, Version 2.0 (the "License");
3//  you may not use this file except in compliance with the
4//  License. You may obtain a copy of the License at
5//    http://www.apache.org/licenses/LICENSE-2.0
6//  Unless required by applicable law or agreed to in writing,
7//  software distributed under the License is distributed on an "AS
8//  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
9//  express or implied. See the License for the specific language
10//  governing permissions and limitations under the License.
11
12// Package moss stands for "memory-oriented sorted segments", and
13// provides a data structure that manages an ordered Collection of
14// key-val entries, with optional persistence.
15//
16// The design is similar to a simplified LSM tree (log structured
17// merge tree), but is more like a "LSM array", in that a stack of
18// immutable, sorted key-val arrays or "segments" is maintained.  When
19// there's an incoming Batch of key-val mutations (see:
20// ExecuteBatch()), the Batch, which is an array of key-val mutations,
21// is sorted in-place and becomes an immutable "segment".  Then, the
22// segment is atomically pushed onto a stack of segment pointers.  A
23// higher segment in the stack will shadow mutations of the same key
24// from lower segments.
25//
26// Separately, an asynchronous goroutine (the "merger") will
27// continuously merge N sorted segments to keep stack height low.
28//
29// In the best case, a remaining, single, large sorted segment will be
30// efficient in memory usage and efficient for binary search and range
31// iteration.
32//
33// Iterations when the stack height is > 1 are implementing using a
34// N-way heap merge.
35//
36// A Batch and a segment is actually two arrays: a byte array of
37// contiguous key-val entries; and an uint64 array of entry offsets
38// and key-val lengths that refer to the previous key-val entries byte
39// array.
40//
41// In this design, stacks are treated as immutable via a copy-on-write
42// approach whenever a stack is "modified".  So, readers and writers
43// essentially don't block each other, and taking a Snapshot is also a
44// relatively simple operation of atomically cloning the stack of
45// segment pointers.
46//
47// Of note: mutations are only supported through Batch operations,
48// which acknowledges the common practice of using batching to achieve
49// higher write performance and embraces it.  Additionally, higher
50// performance can be attained by using the batch memory
51// pre-allocation parameters and the Batch.Alloc() API, allowing
52// applications to serialize keys and vals directly into memory
53// maintained by a batch, which can avoid extra memory copying.
54//
55// IMPORTANT: The keys in a Batch must be unique.  That is,
56// myBatch.Set("x", "foo"); myBatch.Set("x", "bar") is not supported.
57// Applications that do not naturally meet this requirement might
58// maintain their own map[key]val data structures to ensure this
59// uniqueness constraint.
60//
61// An optional, asynchronous persistence goroutine (the "persister")
62// can drain mutations to a lower level, ordered key-value storage
63// layer.  An optional, built-in storage layer ("mossStore") is
64// available, that will asynchronously write segments to the end of a
65// file (append only design), with reads performed using mmap(), and
66// with user controllable compaction configuration.  See:
67// OpenStoreCollection().
68//
69// NOTE: the mossStore persistence design does not currently support
70// moving files created on one machine endian'ness type to another
71// machine with a different endian'ness type.
72//
73package moss
74
75import (
76	"errors"
77	"sync"
78	"time"
79
80	"github.com/couchbase/ghistogram"
81)
82
83// ErrAllocTooLarge is returned when the requested allocation cannot
84// be satisfied by the pre-allocated buffer.
85var ErrAllocTooLarge = errors.New("alloc-too-large")
86
87// ErrAlreadyInitialized is returned when initialization was
88// attempted on an already initialized object.
89var ErrAlreadyInitialized = errors.New("already-initialized")
90
91// ErrCanceled is used when an operation has been canceled.
92var ErrCanceled = errors.New("canceled")
93
94// ErrClosed is returned when the collection is already closed.
95var ErrClosed = errors.New("closed")
96
97// ErrNoSuchCollection is returned when attempting to access or delete
98// an unknown child collection.
99var ErrNoSuchCollection = errors.New("no-such-collection")
100
101// ErrBadCollectionName is returned when the child collection
102// name is invalid, for example "".
103var ErrBadCollectionName = errors.New("bad-collection-name")
104
105// ErrIteratorDone is returned when the iterator has reached the end
106// range of the iterator or the end of the collection.
107var ErrIteratorDone = errors.New("iterator-done")
108
109// ErrMaxTries is returned when a max number of tries or attempts for
110// some operation has been reached.
111var ErrMaxTries = errors.New("max-tries")
112
113// ErrMergeOperatorNil is returned if a merge operation is performed
114// without specifying a MergeOperator in the CollectionOptions.
115var ErrMergeOperatorNil = errors.New("merge-operator-nil")
116
117// ErrMergeOperatorFullMergeFailed is returned when the provided
118// MergeOperator fails during the FullMerge operations.
119var ErrMergeOperatorFullMergeFailed = errors.New("merge-operator-full-merge-failed")
120
121// ErrUnexpected is returned on an unexpected situation.
122var ErrUnexpected = errors.New("unexpected")
123
124// ErrUnimplemented is returned when an unimplemented feature has been
125// used.
126var ErrUnimplemented = errors.New("unimplemented")
127
128// ErrKeyTooLarge is returned when the length of the key exceeds the limit of 2^24.
129var ErrKeyTooLarge = errors.New("key-too-large")
130
131// ErrValueTooLarge is returned when the length of the value exceeds the limit of 2^28.
132var ErrValueTooLarge = errors.New("value-too-large")
133
134// ErrAborted is returned when any operations are aborted.
135var ErrAborted = errors.New("operation-aborted")
136
137// ErrSegmentCorrupted is returned upon any segment corruptions.
138var ErrSegmentCorrupted = errors.New("segment-corrupted")
139
140// A Collection represents an ordered mapping of key-val entries,
141// where a Collection is snapshot'able and atomically updatable.
142type Collection interface {
143	// Start kicks off required background tasks.
144	Start() error
145
146	// Close synchronously stops background tasks and releases
147	// resources.
148	Close() error
149
150	// Options returns the options currently being used.
151	Options() CollectionOptions
152
153	// Snapshot returns a stable Snapshot of the key-value entries.
154	Snapshot() (Snapshot, error)
155
156	// Get retrieves a value from the collection for a given key
157	// and returns nil if the key is not found.
158	Get(key []byte, readOptions ReadOptions) ([]byte, error)
159
160	// NewBatch returns a new Batch instance with preallocated
161	// resources.  See the Batch.Alloc() method.
162	NewBatch(totalOps, totalKeyValBytes int) (Batch, error)
163
164	// ExecuteBatch atomically incorporates the provided Batch into
165	// the Collection.  The Batch instance should be Close()'ed and
166	// not reused after ExecuteBatch() returns.
167	ExecuteBatch(b Batch, writeOptions WriteOptions) error
168
169	// Stats returns stats for this collection.  Note that stats might
170	// be updated asynchronously.
171	Stats() (*CollectionStats, error)
172
173	// Histograms returns a snapshot of the histograms for this
174	// collection.  Note that histograms might be updated
175	// asynchronously.
176	Histograms() ghistogram.Histograms
177}
178
179// CollectionOptions allows applications to specify config settings.
180type CollectionOptions struct {
181	// MergeOperator is an optional func provided by an application
182	// that wants to use Batch.Merge()'ing.
183	MergeOperator MergeOperator `json:"-"`
184
185	// DeferredSort allows ExecuteBatch() to operate more quickly by
186	// deferring the sorting of an incoming batch until it is needed
187	// by a reader.  The tradeoff is that later read operations can
188	// take longer as the sorting is finally performed.
189	DeferredSort bool
190
191	// MinMergePercentage allows the merger to avoid premature merging
192	// of segments that are too small, where a segment X has to reach
193	// a certain size percentage compared to the next lower segment
194	// before segment X (and all segments above X) will be merged.
195	MinMergePercentage float64
196
197	// MaxPreMergerBatches is the max number of batches that can be
198	// accepted into the collection through ExecuteBatch() and held
199	// for merging but that have not been actually processed by the
200	// merger yet.  When the number of held but unprocessed batches
201	// reaches MaxPreMergerBatches, then ExecuteBatch() will block to
202	// allow the merger to catch up.
203	MaxPreMergerBatches int
204
205	// MergerCancelCheckEvery is the number of ops the merger will
206	// perform before it checks to see if a merger operation was
207	// canceled.
208	MergerCancelCheckEvery int
209
210	// MergerIdleRunTimeoutMS is the idle time in milliseconds after which the
211	// background merger will perform an "idle run" which can trigger
212	// incremental compactions to speed up queries.
213	MergerIdleRunTimeoutMS int64
214
215	// MaxDirtyOps, when greater than zero, is the max number of dirty
216	// (unpersisted) ops allowed before ExecuteBatch() blocks to allow
217	// the persister to catch up.  It only has effect with a non-nil
218	// LowerLevelUpdate.
219	MaxDirtyOps uint64
220
221	// MaxDirtyKeyValBytes, when greater than zero, is the max number
222	// of dirty (unpersisted) key-val bytes allowed before
223	// ExecuteBatch() blocks to allow the persister to catch up.  It
224	// only has effect with a non-nil LowerLevelUpdate.
225	MaxDirtyKeyValBytes uint64
226
227	// CachePersisted allows the collection to cache clean, persisted
228	// key-val's, and is considered when LowerLevelUpdate is used.
229	CachePersisted bool
230
231	// LowerLevelInit is an optional Snapshot implementation that
232	// initializes the lower-level storage of a Collection.  This
233	// might be used, for example, for having a Collection be a
234	// write-back cache in front of a persistent implementation.
235	LowerLevelInit Snapshot `json:"-"`
236
237	// LowerLevelUpdate is an optional func that is invoked when the
238	// lower-level storage should be updated.
239	LowerLevelUpdate LowerLevelUpdate `json:"-"`
240
241	Debug int // Higher means more logging, when Log != nil.
242
243	// Log is a callback invoked when the Collection needs to log a
244	// debug message.  Optional, may be nil.
245	Log func(format string, a ...interface{}) `json:"-"`
246
247	// OnError is an optional callback invoked when the Collection
248	// encounters an error.  This might happen when the background
249	// goroutines of moss encounter errors, such as during segment
250	// merging or optional persistence operations.
251	OnError func(error) `json:"-"`
252
253	// OnEvent is an optional callback invoked on Collection related
254	// processing events.  If the application's callback
255	// implementation blocks, it may pause processing and progress,
256	// depending on the type of callback event kind.
257	OnEvent func(event Event) `json:"-"`
258
259	// ReadOnly means that persisted data and storage files if any,
260	// will remain unchanged.
261	ReadOnly bool
262}
263
264// Event represents the information provided in an OnEvent() callback.
265type Event struct {
266	Kind       EventKind
267	Collection Collection
268	Duration   time.Duration
269}
270
271// EventKind represents an event code for OnEvent() callbacks.
272type EventKind int
273
274// EventKindCloseStart is fired when a collection.Close() has begun.
275// The closing might take awhile to complete and an EventKindClose
276// will follow later.
277var EventKindCloseStart = EventKind(1)
278
279// EventKindClose is fired when a collection has been fully closed.
280var EventKindClose = EventKind(2)
281
282// EventKindMergerProgress is fired when the merger has completed a
283// round of merge processing.
284var EventKindMergerProgress = EventKind(3)
285
286// EventKindPersisterProgress is fired when the persister has
287// completed a round of persistence processing.
288var EventKindPersisterProgress = EventKind(4)
289
290// EventKindBatchExecuteStart is fired when a collection is starting
291// to execute a batch.
292var EventKindBatchExecuteStart = EventKind(5)
293
294// EventKindBatchExecute is fired when a collection has finished
295// executing a batch.
296var EventKindBatchExecute = EventKind(6)
297
298// DefaultCollectionOptions are the default configuration options.
299var DefaultCollectionOptions = CollectionOptions{
300	MergeOperator:          nil,
301	MinMergePercentage:     0.8,
302	MaxPreMergerBatches:    10,
303	MergerCancelCheckEvery: 10000,
304	MergerIdleRunTimeoutMS: 0,
305	Debug: 0,
306	Log:   nil,
307}
308
309// BatchOptions are provided to NewChildCollectionBatch().
310type BatchOptions struct {
311	TotalOps         int
312	TotalKeyValBytes int
313}
314
315// A Batch is a set of mutations that will be incorporated atomically
316// into a Collection.  NOTE: the keys in a Batch must be unique.
317//
318// Concurrent Batch's are allowed, but to avoid races, concurrent
319// Batches should only be used by concurrent goroutines that can
320// ensure the mutation keys are partitioned or non-overlapping between
321// Batch instances.
322type Batch interface {
323	// Close must be invoked to release resources.
324	Close() error
325
326	// Set creates or updates an key-val entry in the Collection.  The
327	// key must be unique (not repeated) within the Batch.  Set()
328	// copies the key and val bytes into the Batch, so the memory
329	// bytes of the key and val may be reused by the caller.
330	Set(key, val []byte) error
331
332	// Del deletes a key-val entry from the Collection.  The key must
333	// be unique (not repeated) within the Batch.  Del copies the key
334	// bytes into the Batch, so the memory bytes of the key may be
335	// reused by the caller.  Del() on a non-existent key results in a
336	// nil error.
337	Del(key []byte) error
338
339	// Merge creates or updates a key-val entry in the Collection via
340	// the MergeOperator defined in the CollectionOptions.  The key
341	// must be unique (not repeated) within the Batch.  Merge() copies
342	// the key and val bytes into the Batch, so the memory bytes of
343	// the key and val may be reused by the caller.
344	Merge(key, val []byte) error
345
346	// ----------------------------------------------------
347
348	// Alloc provides a slice of bytes "owned" by the Batch, to reduce
349	// extra copying of memory.  See the Collection.NewBatch() method.
350	Alloc(numBytes int) ([]byte, error)
351
352	// AllocSet is like Set(), but the caller must provide []byte
353	// parameters that came from Alloc().
354	AllocSet(keyFromAlloc, valFromAlloc []byte) error
355
356	// AllocDel is like Del(), but the caller must provide []byte
357	// parameters that came from Alloc().
358	AllocDel(keyFromAlloc []byte) error
359
360	// AllocMerge is like Merge(), but the caller must provide []byte
361	// parameters that came from Alloc().
362	AllocMerge(keyFromAlloc, valFromAlloc []byte) error
363
364	// NewChildCollectionBatch returns a new Batch instance with preallocated
365	// resources for a specific child collection given its unique name.
366	// The child Batch will be executed atomically along with any
367	// other child batches and with the top-level Batch
368	// when the top-level Batch is executed.
369	// The child collection name should not start with a '.' (period)
370	// as those are reserved for future moss usage.
371	NewChildCollectionBatch(collectionName string, options BatchOptions) (Batch, error)
372
373	// DelChildCollection records a child collection deletion given the name.
374	// It only takes effect when the top-level batch is executed.
375	DelChildCollection(collectionName string) error
376}
377
378// A Snapshot is a stable view of a Collection for readers, isolated
379// from concurrent mutation activity.
380type Snapshot interface {
381	// Close must be invoked to release resources.
382	Close() error
383
384	// Get retrieves a val from the Snapshot, and will return nil val
385	// if the entry does not exist in the Snapshot.
386	Get(key []byte, readOptions ReadOptions) ([]byte, error)
387
388	// StartIterator returns a new Iterator instance on this Snapshot.
389	//
390	// On success, the returned Iterator will be positioned so that
391	// Iterator.Current() will either provide the first entry in the
392	// range or ErrIteratorDone.
393	//
394	// A startKeyInclusive of nil means the logical "bottom-most"
395	// possible key and an endKeyExclusive of nil means the logical
396	// key that's above the "top-most" possible key.
397	StartIterator(startKeyInclusive, endKeyExclusive []byte,
398		iteratorOptions IteratorOptions) (Iterator, error)
399
400	// ChildCollectionNames returns an array of child collection name strings.
401	ChildCollectionNames() ([]string, error)
402
403	// ChildCollectionSnapshot returns a Snapshot on a given child
404	// collection by its name.
405	ChildCollectionSnapshot(childCollectionName string) (Snapshot, error)
406}
407
408// An Iterator allows enumeration of key-val entries.
409type Iterator interface {
410	// Close must be invoked to release resources.
411	Close() error
412
413	// Next moves the Iterator to the next key-val entry and will
414	// return ErrIteratorDone if the Iterator is done.
415	Next() error
416
417	// SeekTo moves the Iterator to the lowest key-val entry whose key
418	// is >= the given seekToKey, and will return ErrIteratorDone if
419	// the Iterator is done.  SeekTo() will respect the
420	// startKeyInclusive/endKeyExclusive bounds, if any, that were
421	// specified with StartIterator().  Seeking to before the
422	// startKeyInclusive will end up on the first key.  Seeking to or
423	// after the endKeyExclusive will result in ErrIteratorDone.
424	SeekTo(seekToKey []byte) error
425
426	// Current returns ErrIteratorDone if the iterator is done.
427	// Otherwise, Current() returns the current key and val, which
428	// should be treated as immutable or read-only.  The key and val
429	// bytes will remain available until the next call to Next() or
430	// Close().
431	Current() (key, val []byte, err error)
432
433	// CurrentEx is a more advanced form of Current() that returns
434	// more metadata for each entry.  It is more useful when used with
435	// IteratorOptions.IncludeDeletions of true.  It returns
436	// ErrIteratorDone if the iterator is done.  Otherwise, the
437	// current EntryEx, key, val are returned, which should be treated
438	// as immutable or read-only.
439	CurrentEx() (entryEx EntryEx, key, val []byte, err error)
440}
441
442// WriteOptions are provided to Collection.ExecuteBatch().
443type WriteOptions struct {
444}
445
446// ReadOptions are provided to Snapshot.Get().
447type ReadOptions struct {
448	// By default, the value returned during lookups or Get()'s are
449	// copied.  Specifying true for NoCopyValue means don't copy the
450	// value bytes, where the caller should copy the value themselves
451	// if they need the value after the lifetime of the enclosing
452	// snapshot.  When true, the caller must treat the value returned
453	// by a lookup/Get() as immutable.
454	NoCopyValue bool
455
456	// SkipLowerLevel is an advanced flag that specifies that a
457	// point lookup should fail on a cache-miss and not attempt to access
458	// key-val entries from the optional, chained,
459	// lower-level snapshot (disk based). See
460	// CollectionOptions.LowerLevelInit/LowerLevelUpdate.
461	SkipLowerLevel bool
462}
463
464// IteratorOptions are provided to StartIterator().
465type IteratorOptions struct {
466	// IncludeDeletions is an advanced flag that specifies that an
467	// Iterator should include deletion operations in its enuemration.
468	// See also the Iterator.CurrentEx() method.
469	IncludeDeletions bool
470
471	// SkipLowerLevel is an advanced flag that specifies that an
472	// Iterator should not enumerate key-val entries from the
473	// optional, chained, lower-level iterator.  See
474	// CollectionOptions.LowerLevelInit/LowerLevelUpdate.
475	SkipLowerLevel bool
476
477	// MinSegmentLevel is an advanced parameter that specifies that an
478	// Iterator should skip segments at a level less than
479	// MinSegmentLevel.  MinSegmentLevel is 0-based level, like an
480	// array index.
481	MinSegmentLevel int
482
483	// MaxSegmentHeight is an advanced parameter that specifies that
484	// an Iterator should skip segments at a level >= than
485	// MaxSegmentHeight.  MaxSegmentHeight is 1-based height, like an
486	// array length.
487	MaxSegmentHeight int
488
489	// base is used internally to provide the iterator with a
490	// segmentStack to use instead of a lower-level snapshot.  It's
491	// used so that segment merging consults the stackDirtyBase.
492	base *segmentStack
493}
494
495// EntryEx provides extra, advanced information about an entry from
496// the Iterator.CurrentEx() method.
497type EntryEx struct {
498	// Operation is an OperationXxx const.
499	Operation uint64
500}
501
502// OperationSet replaces the value associated with the key.
503const OperationSet = uint64(0x0100000000000000)
504
505// OperationDel removes the value associated with the key.
506const OperationDel = uint64(0x0200000000000000)
507
508// OperationMerge merges the new value with the existing value associated with
509// the key, as described by the configured MergeOperator.
510const OperationMerge = uint64(0x0300000000000000)
511
512// A MergeOperator may be implemented by applications that wish to
513// optimize their read-compute-write use cases.  Write-heavy counters,
514// for example, could be implemented efficiently by using the
515// MergeOperator functionality.
516type MergeOperator interface {
517	// Name returns an identifier for this merge operator, which might
518	// be used for logging / debugging.
519	Name() string
520
521	// FullMerge the full sequence of operands on top of an
522	// existingValue and returns the merged value.  The existingValue
523	// may be nil if no value currently exists.  If full merge cannot
524	// be done, return (nil, false).
525	FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool)
526
527	// Partially merge two operands.  If partial merge cannot be done,
528	// return (nil, false), which will defer processing until a later
529	// FullMerge().
530	PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool)
531}
532
533// LowerLevelUpdate is the func callback signature used when a
534// Collection wants to update its optional, lower-level storage.
535type LowerLevelUpdate func(higher Snapshot) (lower Snapshot, err error)
536
537// CollectionStats fields that are prefixed like CurXxxx are gauges
538// (can go up and down), and fields that are prefixed like TotXxxx are
539// monotonically increasing counters.
540type CollectionStats struct {
541	TotOnError uint64
542
543	TotCloseBeg           uint64
544	TotCloseMergerDone    uint64
545	TotClosePersisterDone uint64
546	TotCloseLowerLevelBeg uint64
547	TotCloseLowerLevelEnd uint64
548	TotCloseEnd           uint64
549
550	TotSnapshotBeg           uint64
551	TotSnapshotEnd           uint64
552	TotSnapshotInternalBeg   uint64
553	TotSnapshotInternalEnd   uint64
554	TotSnapshotInternalClose uint64
555
556	TotGet    uint64
557	TotGetErr uint64
558
559	TotNewBatch                 uint64
560	TotNewBatchTotalOps         uint64
561	TotNewBatchTotalKeyValBytes uint64
562
563	TotExecuteBatchBeg            uint64
564	TotExecuteBatchErr            uint64
565	TotExecuteBatchEmpty          uint64
566	TotExecuteBatchWaitBeg        uint64
567	TotExecuteBatchWaitEnd        uint64
568	TotExecuteBatchAwakeMergerBeg uint64
569	TotExecuteBatchAwakeMergerEnd uint64
570	TotExecuteBatchEnd            uint64
571
572	TotNotifyMergerBeg uint64
573	TotNotifyMergerEnd uint64
574
575	TotMergerEnd                  uint64
576	TotMergerLoop                 uint64
577	TotMergerLoopRepeat           uint64
578	TotMergerAll                  uint64
579	TotMergerInternalBeg          uint64
580	TotMergerInternalErr          uint64
581	TotMergerInternalEnd          uint64
582	TotMergerInternalSkip         uint64
583	TotMergerLowerLevelNotify     uint64
584	TotMergerLowerLevelNotifySkip uint64
585	TotMergerEmptyDirtyMid        uint64
586
587	TotMergerWaitIncomingBeg  uint64
588	TotMergerWaitIncomingStop uint64
589	TotMergerWaitIncomingEnd  uint64
590	TotMergerWaitIncomingSkip uint64
591	TotMergerIdleSleeps       uint64
592	TotMergerIdleRuns         uint64
593
594	TotMergerWaitOutgoingBeg  uint64
595	TotMergerWaitOutgoingStop uint64
596	TotMergerWaitOutgoingEnd  uint64
597	TotMergerWaitOutgoingSkip uint64
598
599	TotPersisterLoop       uint64
600	TotPersisterLoopRepeat uint64
601	TotPersisterWaitBeg    uint64
602	TotPersisterWaitEnd    uint64
603	TotPersisterEnd        uint64
604
605	TotPersisterLowerLevelUpdateBeg uint64
606	TotPersisterLowerLevelUpdateErr uint64
607	TotPersisterLowerLevelUpdateEnd uint64
608
609	CurDirtyOps      uint64
610	CurDirtyBytes    uint64
611	CurDirtySegments uint64
612
613	CurDirtyTopOps      uint64
614	CurDirtyTopBytes    uint64
615	CurDirtyTopSegments uint64
616
617	CurDirtyMidOps      uint64
618	CurDirtyMidBytes    uint64
619	CurDirtyMidSegments uint64
620
621	CurDirtyBaseOps      uint64
622	CurDirtyBaseBytes    uint64
623	CurDirtyBaseSegments uint64
624
625	CurCleanOps      uint64
626	CurCleanBytes    uint64
627	CurCleanSegments uint64
628}
629
630// ------------------------------------------------------------
631
632// NewCollection returns a new, unstarted Collection instance.
633func NewCollection(options CollectionOptions) (
634	Collection, error) {
635	histograms := make(ghistogram.Histograms)
636	histograms["ExecuteBatchBytes"] =
637		ghistogram.NewNamedHistogram("ExecuteBatchBytes", 10, 4, 4)
638	histograms["ExecuteBatchOpsCount"] =
639		ghistogram.NewNamedHistogram("ExecuteBatchOpsCount", 10, 4, 4)
640	histograms["ExecuteBatchUsecs"] =
641		ghistogram.NewNamedHistogram("ExecuteBatchUsecs", 10, 4, 4)
642	histograms["MergerUsecs"] =
643		ghistogram.NewNamedHistogram("MergerUsecs", 10, 4, 4)
644	histograms["MutationKeyBytes"] =
645		ghistogram.NewNamedHistogram("MutationKeyBytes", 10, 4, 4)
646	histograms["MutationValBytes"] =
647		ghistogram.NewNamedHistogram("MutationValBytes", 10, 4, 4)
648
649	c := &collection{
650		options:            &options,
651		stopCh:             make(chan struct{}),
652		pingMergerCh:       make(chan ping, 10),
653		doneMergerCh:       make(chan struct{}),
654		donePersisterCh:    make(chan struct{}),
655		lowerLevelSnapshot: NewSnapshotWrapper(options.LowerLevelInit, nil),
656		stats:              &CollectionStats{},
657		histograms:         histograms,
658	}
659
660	c.stackDirtyTopCond = sync.NewCond(&c.m)
661	c.stackDirtyBaseCond = sync.NewCond(&c.m)
662
663	return c, nil
664}
665