1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This package is copied from the golang source distribution. It will
6// be modified to add parallelism.
7
8// Package sort provides primitives for sorting slices and user-defined
9// collections.
10package sort
11
12import "sort"
13
14func min(a, b int) int {
15	if a < b {
16		return a
17	}
18	return b
19}
20
21// Insertion sort
22func insertionSort(data sort.Interface, a, b int) {
23	for i := a + 1; i < b; i++ {
24		for j := i; j > a && data.Less(j, j-1); j-- {
25			data.Swap(j, j-1)
26		}
27	}
28}
29
30// siftDown implements the heap property on data[lo, hi).
31// first is an offset into the array where the root of the heap lies.
32func siftDown(data sort.Interface, lo, hi, first int) {
33	root := lo
34	for {
35		child := 2*root + 1
36		if child >= hi {
37			break
38		}
39		if child+1 < hi && data.Less(first+child, first+child+1) {
40			child++
41		}
42		if !data.Less(first+root, first+child) {
43			return
44		}
45		data.Swap(first+root, first+child)
46		root = child
47	}
48}
49
50func heapSort(data sort.Interface, a, b int) {
51	first := a
52	lo := 0
53	hi := b - a
54
55	// Build heap with greatest element at top.
56	for i := (hi - 1) / 2; i >= 0; i-- {
57		siftDown(data, i, hi, first)
58	}
59
60	// Pop elements, largest first, into end of data.
61	for i := hi - 1; i >= 0; i-- {
62		data.Swap(first, first+i)
63		siftDown(data, lo, i, first)
64	}
65}
66
67// Quicksort, following Bentley and McIlroy,
68// ``Engineering a Sort Function,'' SP&E November 1993.
69
70// medianOfThree moves the median of the three values data[a], data[b], data[c] into data[a].
71func medianOfThree(data sort.Interface, a, b, c int) {
72	m0 := b
73	m1 := a
74	m2 := c
75	// bubble sort on 3 elements
76	if data.Less(m1, m0) {
77		data.Swap(m1, m0)
78	}
79	if data.Less(m2, m1) {
80		data.Swap(m2, m1)
81	}
82	if data.Less(m1, m0) {
83		data.Swap(m1, m0)
84	}
85	// now data[m0] <= data[m1] <= data[m2]
86}
87
88func swapRange(data sort.Interface, a, b, n int) {
89	for i := 0; i < n; i++ {
90		data.Swap(a+i, b+i)
91	}
92}
93
94func doPivot(data sort.Interface, lo, hi int) (midlo, midhi int) {
95	m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
96	if hi-lo > 40 {
97		// Tukey's ``Ninther,'' median of three medians of three.
98		s := (hi - lo) / 8
99		medianOfThree(data, lo, lo+s, lo+2*s)
100		medianOfThree(data, m, m-s, m+s)
101		medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
102	}
103	medianOfThree(data, lo, m, hi-1)
104
105	// Invariants are:
106	//	data[lo] = pivot (set up by ChoosePivot)
107	//	data[lo <= i < a] = pivot
108	//	data[a <= i < b] < pivot
109	//	data[b <= i < c] is unexamined
110	//	data[c <= i < d] > pivot
111	//	data[d <= i < hi] = pivot
112	//
113	// Once b meets c, can swap the "= pivot" sections
114	// into the middle of the slice.
115	pivot := lo
116	a, b, c, d := lo+1, lo+1, hi, hi
117	for {
118		for b < c {
119			if data.Less(b, pivot) { // data[b] < pivot
120				b++
121			} else if !data.Less(pivot, b) { // data[b] = pivot
122				data.Swap(a, b)
123				a++
124				b++
125			} else {
126				break
127			}
128		}
129		for b < c {
130			if data.Less(pivot, c-1) { // data[c-1] > pivot
131				c--
132			} else if !data.Less(c-1, pivot) { // data[c-1] = pivot
133				data.Swap(c-1, d-1)
134				c--
135				d--
136			} else {
137				break
138			}
139		}
140		if b >= c {
141			break
142		}
143		// data[b] > pivot; data[c-1] < pivot
144		data.Swap(b, c-1)
145		b++
146		c--
147	}
148
149	n := min(b-a, a-lo)
150	swapRange(data, lo, b-n, n)
151
152	n = min(hi-d, d-c)
153	swapRange(data, c, hi-n, n)
154
155	return lo + b - a, hi - (d - c)
156}
157
158type childChannel chan bool
159
160const _PTHRESH = 256
161
162func quickSort(data sort.Interface, a, b, maxDepth int, parent childChannel) {
163	defer notify(parent)
164
165	var children childChannel
166	childCount := 0
167
168	if b-a >= _PTHRESH {
169		children = make(childChannel, 64)
170		defer close(children)
171	}
172
173	for b-a > 7 {
174		if maxDepth == 0 {
175			heapSort(data, a, b)
176			// MB-25900 await children
177			for ; childCount > 0; childCount-- {
178				<-children
179			}
180			return
181		}
182		maxDepth--
183		mlo, mhi := doPivot(data, a, b)
184
185		// Avoiding recursion on the larger subproblem guarantees
186		// a stack depth of at most lg(b-a).
187		if mlo-a < b-mhi {
188			if (mlo < mhi) && (mlo-a >= _PTHRESH) {
189				go quickSort(data, a, mlo, maxDepth, children)
190				childCount++
191			} else {
192				quickSort(data, a, mlo, maxDepth, nil)
193			}
194			a = mhi // i.e., quickSort(data, mhi, b)
195		} else {
196			if (mlo < mhi) && (b-mhi >= _PTHRESH) {
197				go quickSort(data, mhi, b, maxDepth, children)
198				childCount++
199			} else {
200				quickSort(data, mhi, b, maxDepth, nil)
201			}
202			b = mlo // i.e., quickSort(data, a, mlo)
203		}
204	}
205
206	// Await children
207	for ; childCount > 0; childCount-- {
208		<-children
209	}
210
211	if b-a > 1 {
212		insertionSort(data, a, b)
213	}
214}
215
216func notify(parent childChannel) {
217	if parent != nil {
218		parent <- false
219	}
220}
221
222// Sort sorts data.
223// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
224// data.Less and data.Swap. The sort is not guaranteed to be stable.
225func Sort(data sort.Interface) {
226	// Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached.
227	n := data.Len()
228	maxDepth := 0
229	for i := n; i > 0; i >>= 1 {
230		maxDepth++
231	}
232	maxDepth *= 2
233	quickSort(data, 0, n, maxDepth, nil)
234}
235
236type reverse struct {
237	// This embedded sort.Interface permits Reverse to use the methods of
238	// another sort.Interface implementation.
239	sort.Interface
240}
241
242// Less returns the opposite of the embedded implementation's Less method.
243func (r reverse) Less(i, j int) bool {
244	return r.Interface.Less(j, i)
245}
246
247// Reverse returns the reverse order for data.
248func Reverse(data sort.Interface) sort.Interface {
249	return &reverse{data}
250}
251
252// IsSorted reports whether data is sorted.
253func IsSorted(data sort.Interface) bool {
254	n := data.Len()
255	for i := n - 1; i > 0; i-- {
256		if data.Less(i, i-1) {
257			return false
258		}
259	}
260	return true
261}
262
263// Convenience types for common cases
264
265// IntSlice attaches the methods of sort.Interface to []int, sorting in increasing order.
266type IntSlice []int
267
268func (p IntSlice) Len() int           { return len(p) }
269func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
270func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
271
272// Sort is a convenience method.
273func (p IntSlice) Sort() { Sort(p) }
274
275// Float64Slice attaches the methods of sort.Interface to []float64, sorting in increasing order.
276type Float64Slice []float64
277
278func (p Float64Slice) Len() int           { return len(p) }
279func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
280func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
281
282// isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
283func isNaN(f float64) bool {
284	return f != f
285}
286
287// Sort is a convenience method.
288func (p Float64Slice) Sort() { Sort(p) }
289
290// StringSlice attaches the methods of sort.Interface to []string, sorting in increasing order.
291type StringSlice []string
292
293func (p StringSlice) Len() int           { return len(p) }
294func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
295func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
296
297// Sort is a convenience method.
298func (p StringSlice) Sort() { Sort(p) }
299
300// Convenience wrappers for common cases
301
302// Ints sorts a slice of ints in increasing order.
303func Ints(a []int) { Sort(IntSlice(a)) }
304
305// Float64s sorts a slice of float64s in increasing order.
306func Float64s(a []float64) { Sort(Float64Slice(a)) }
307
308// Strings sorts a slice of strings in increasing order.
309func Strings(a []string) { Sort(StringSlice(a)) }
310
311// IntsAreSorted tests whether a slice of ints is sorted in increasing order.
312func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
313
314// Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
315func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
316
317// StringsAreSorted tests whether a slice of strings is sorted in increasing order.
318func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
319
320// Notes on stable sorting:
321// The used algorithms are simple and provable correct on all input and use
322// only logarithmic additional stack space.  They perform well if compared
323// experimentaly to other stable in-place sorting algorithms.
324//
325// Remarks on other algoritms evaluated:
326//  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
327//    Not faster.
328//  - GCC's __rotate for block rotations: Not faster.
329//  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
330//    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
331//    The given algorithms are in-place, number of Swap and Assignments
332//    grow as n log n but the algorithm is not stable.
333//  - "Fast Stable In-Plcae Sorting with O(n) Data Moves" J.I. Munro and
334//    V. Raman in Algorithmica (1996) 16, 115-160:
335//    This algorithm either needs additional 2n bits or works only if there
336//    are enough different elements available to encode some permutations
337//    which have to be undone later (so not stable an any input).
338//  - All the optimal in-place sorting/merging algorithms I found are either
339//    unstable or rely on enough different elements in each step to encode the
340//    performed block rearrangements. See also "In-Place Merging Algorithms",
341//    Denham Coates-Evely, Department of Computer Science, Kings College,
342//    January 2004 and the reverences in there.
343//  - Often "optimal" algorithms are optimal in the number of assignments
344//    but sort.Interface has only Swap as operation.
345
346// Stable sorts data while keeping the original order of equal elements.
347//
348// It makes one call to data.Len to determine n, O(n*log(n)) calls to
349// data.Less and O(n*log(n)*log(n)) calls to data.Swap.
350func Stable(data sort.Interface) {
351	n := data.Len()
352	blockSize := 20
353	a, b := 0, blockSize
354	for b <= n {
355		insertionSort(data, a, b)
356		a = b
357		b += blockSize
358	}
359	insertionSort(data, a, n)
360
361	for blockSize < n {
362		a, b = 0, 2*blockSize
363		for b <= n {
364			symMerge(data, a, a+blockSize, b)
365			a = b
366			b += 2 * blockSize
367		}
368		symMerge(data, a, a+blockSize, n)
369		blockSize *= 2
370	}
371}
372
373// SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
374// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
375// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
376// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
377// Computer Science, pages 714-723. Springer, 2004.
378//
379// Let M = m-a and N = b-n. Wolog M < N.
380// The recursion depth is bound by ceil(log(N+M)).
381// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
382// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
383//
384// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
385// rotation algorithm wich uses O(M+N+gcd(M+N)) assignments. The argumentation
386// in the paper carries through for Swap operations, especially as the block
387// swapping rotate uses only O(M+N) Swaps.
388func symMerge(data sort.Interface, a, m, b int) {
389	if a >= m || m >= b {
390		return
391	}
392
393	mid := a + (b-a)/2
394	n := mid + m
395	start := 0
396	if m > mid {
397		start = n - b
398		r, p := mid, n-1
399		for start < r {
400			c := start + (r-start)/2
401			if !data.Less(p-c, c) {
402				start = c + 1
403			} else {
404				r = c
405			}
406		}
407	} else {
408		start = a
409		r, p := m, n-1
410		for start < r {
411			c := start + (r-start)/2
412			if !data.Less(p-c, c) {
413				start = c + 1
414			} else {
415				r = c
416			}
417		}
418	}
419	end := n - start
420	rotate(data, start, m, end)
421	symMerge(data, a, start, mid)
422	symMerge(data, mid, end, b)
423}
424
425// Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
426// Data of the form 'x u v y' is changed to 'x v u y'.
427// Rotate performs at most b-a many calls to data.Swap.
428func rotate(data sort.Interface, a, m, b int) {
429	i := m - a
430	if i == 0 {
431		return
432	}
433	j := b - m
434	if j == 0 {
435		return
436	}
437
438	if i == j {
439		swapRange(data, a, m, i)
440		return
441	}
442
443	p := a + i
444	for i != j {
445		if i > j {
446			swapRange(data, p-i, p, j)
447			i -= j
448		} else {
449			swapRange(data, p-i, p+j-i, i)
450			j -= i
451		}
452	}
453	swapRange(data, p-i, p, i)
454}
455
456/*
457Complexity of Stable Sorting
458
459
460Complexity of block swapping rotation
461
462Each Swap puts one new element into its correct, final position.
463Elements which reach their final position are no longer moved.
464Thus block swapping rotation needs |u|+|v| calls to Swaps.
465This is best possible as each element might need a move.
466
467Pay attention when comparing to other optimal algorithms which
468typically count the number of assignments instead of swaps:
469E.g. the optimal algorithm of Dudzinski and Dydek for in-place
470rotations uses O(u + v + gcd(u,v)) assignments which is
471better than our O(3 * (u+v)) as gcd(u,v) <= u.
472
473
474Stable sorting by SymMerge and BlockSwap rotations
475
476SymMerg complexity for same size input M = N:
477Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
478Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
479
480(The following argument does not fuzz over a missing -1 or
481other stuff which does not impact the final result).
482
483Let n = data.Len(). Assume n = 2^k.
484
485Plain merge sort performs log(n) = k iterations.
486On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
487
488Thus iteration i of merge sort performs:
489Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
490Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
491
492In total k = log(n) iterations are performed; so in total:
493Calls to Less O(log(n) * n)
494Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
495   = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
496
497
498Above results should generalize to arbitrary n = 2^k + p
499and should not be influenced by the initial insertion sort phase:
500Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
501size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
502Merge sort iterations start at i = log(bs). With t = log(bs) constant:
503Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
504   = O(n * log(n))
505Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
506
507*/
508