diff options
Diffstat (limited to 'libgo/go/sort')
-rw-r--r-- | libgo/go/sort/search.go | 110 | ||||
-rw-r--r-- | libgo/go/sort/search_test.go | 137 | ||||
-rw-r--r-- | libgo/go/sort/sort.go | 206 | ||||
-rw-r--r-- | libgo/go/sort/sort_test.go | 267 |
4 files changed, 720 insertions, 0 deletions
diff --git a/libgo/go/sort/search.go b/libgo/go/sort/search.go new file mode 100644 index 000000000..6828e19b6 --- /dev/null +++ b/libgo/go/sort/search.go @@ -0,0 +1,110 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements binary search. + +package sort + +// Search uses binary search to find and return the smallest index i +// in [0, n) at which f(i) is true, assuming that on the range [0, n), +// f(i) == true implies f(i+1) == true. That is, Search requires that +// f is false for some (possibly empty) prefix of the input range [0, n) +// and then true for the (possibly empty) remainder; Search returns +// the first true index. If there is no such index, Search returns n. +// Search calls f(i) only for i in the range [0, n). +// +// A common use of Search is to find the index i for a value x in +// a sorted, indexable data structure like an array or slice. +// In this case, the argument f, typically a closure, captures the value +// to be searched for, and how the data structure is indexed and +// ordered. +// +// For instance, given a slice data sorted in ascending order, +// the call Search(len(data), func(i int) bool { return data[i] >= 23 }) +// returns the smallest index i such that data[i] >= 23. If the caller +// wants to find whether 23 is in the slice, it must test data[i] == 23 +// separately. +// +// Searching data sorted in descending order would use the <= +// operator instead of the >= operator. +// +// To complete the example above, the following code tries to find the value +// x in an integer slice data sorted in ascending order: +// +// x := 23 +// i := sort.Search(len(data), func(i int) bool { return data[i] >= x }) +// if i < len(data) && data[i] == x { +// // x is present at data[i] +// } else { +// // x is not present in data, +// // but i is the index where it would be inserted. +// } +// +// As a more whimsical example, this program guesses your number: +// +// func GuessingGame() { +// var s string +// fmt.Printf("Pick an integer from 0 to 100.\n") +// answer := sort.Search(100, func(i int) bool { +// fmt.Printf("Is your number <= %d? ", i) +// fmt.Scanf("%s", &s) +// return s != "" && s[0] == 'y' +// }) +// fmt.Printf("Your number is %d.\n", answer) +// } +// +func Search(n int, f func(int) bool) int { + // Define f(-1) == false and f(n) == true. + // Invariant: f(i-1) == false, f(j) == true. + i, j := 0, n + for i < j { + h := i + (j-i)/2 // avoid overflow when computing h + // i ≤ h < j + if !f(h) { + i = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i. + return i +} + + +// Convenience wrappers for common cases. + +// SearchInts searches for x in a sorted slice of ints and returns the index +// as specified by Search. The array must be sorted in ascending order. +// +func SearchInts(a []int, x int) int { + return Search(len(a), func(i int) bool { return a[i] >= x }) +} + + +// SearchFloat64s searches for x in a sorted slice of float64s and returns the index +// as specified by Search. The array must be sorted in ascending order. +// +func SearchFloat64s(a []float64, x float64) int { + return Search(len(a), func(i int) bool { return a[i] >= x }) +} + + +// SearchStrings searches for x in a sorted slice of strings and returns the index +// as specified by Search. The array must be sorted in ascending order. +// +func SearchStrings(a []string, x string) int { + return Search(len(a), func(i int) bool { return a[i] >= x }) +} + + +// Search returns the result of applying SearchInts to the receiver and x. +func (p IntArray) Search(x int) int { return SearchInts(p, x) } + + +// Search returns the result of applying SearchFloat64s to the receiver and x. +func (p Float64Array) Search(x float64) int { return SearchFloat64s(p, x) } + + +// Search returns the result of applying SearchStrings to the receiver and x. +func (p StringArray) Search(x string) int { return SearchStrings(p, x) } diff --git a/libgo/go/sort/search_test.go b/libgo/go/sort/search_test.go new file mode 100644 index 000000000..939f66af3 --- /dev/null +++ b/libgo/go/sort/search_test.go @@ -0,0 +1,137 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort + +import "testing" + + +func f(a []int, x int) func(int) bool { + return func(i int) bool { + return a[i] >= x + } +} + + +var data = []int{0: -10, 1: -5, 2: 0, 3: 1, 4: 2, 5: 3, 6: 5, 7: 7, 8: 11, 9: 100, 10: 100, 11: 100, 12: 1000, 13: 10000} + +var tests = []struct { + name string + n int + f func(int) bool + i int +}{ + {"empty", 0, nil, 0}, + {"1 1", 1, func(i int) bool { return i >= 1 }, 1}, + {"1 true", 1, func(i int) bool { return true }, 0}, + {"1 false", 1, func(i int) bool { return false }, 1}, + {"1e9 991", 1e9, func(i int) bool { return i >= 991 }, 991}, + {"1e9 true", 1e9, func(i int) bool { return true }, 0}, + {"1e9 false", 1e9, func(i int) bool { return false }, 1e9}, + {"data -20", len(data), f(data, -20), 0}, + {"data -10", len(data), f(data, -10), 0}, + {"data -9", len(data), f(data, -9), 1}, + {"data -6", len(data), f(data, -6), 1}, + {"data -5", len(data), f(data, -5), 1}, + {"data 3", len(data), f(data, 3), 5}, + {"data 11", len(data), f(data, 11), 8}, + {"data 99", len(data), f(data, 99), 9}, + {"data 100", len(data), f(data, 100), 9}, + {"data 101", len(data), f(data, 101), 12}, + {"data 10000", len(data), f(data, 10000), 13}, + {"data 10001", len(data), f(data, 10001), 14}, + {"descending a", 7, func(i int) bool { return []int{99, 99, 59, 42, 7, 0, -1, -1}[i] <= 7 }, 4}, + {"descending 7", 1e9, func(i int) bool { return 1e9-i <= 7 }, 1e9 - 7}, + {"overflow", 2e9, func(i int) bool { return false }, 2e9}, +} + + +func TestSearch(t *testing.T) { + for _, e := range tests { + i := Search(e.n, e.f) + if i != e.i { + t.Errorf("%s: expected index %d; got %d", e.name, e.i, i) + } + } +} + + +// log2 computes the binary logarithm of x, rounded up to the next integer. +// (log2(0) == 0, log2(1) == 0, log2(2) == 1, log2(3) == 2, etc.) +// +func log2(x int) int { + n := 0 + for p := 1; p < x; p += p { + // p == 2**n + n++ + } + // p/2 < x <= p == 2**n + return n +} + + +func TestSearchEfficiency(t *testing.T) { + n := 100 + step := 1 + for exp := 2; exp < 10; exp++ { + // n == 10**exp + // step == 10**(exp-2) + max := log2(n) + for x := 0; x < n; x += step { + count := 0 + i := Search(n, func(i int) bool { count++; return i >= x }) + if i != x { + t.Errorf("n = %d: expected index %d; got %d", n, x, i) + } + if count > max { + t.Errorf("n = %d, x = %d: expected <= %d calls; got %d", n, x, max, count) + } + } + n *= 10 + step *= 10 + } +} + + +// Smoke tests for convenience wrappers - not comprehensive. + +var fdata = []float64{0: -3.14, 1: 0, 2: 1, 3: 2, 4: 1000.7} +var sdata = []string{0: "f", 1: "foo", 2: "foobar", 3: "x"} + +var wrappertests = []struct { + name string + result int + i int +}{ + {"SearchInts", SearchInts(data, 11), 8}, + {"SearchFloat64s", SearchFloat64s(fdata, 2.1), 4}, + {"SearchStrings", SearchStrings(sdata, ""), 0}, + {"IntArray.Search", IntArray(data).Search(0), 2}, + {"Float64Array.Search", Float64Array(fdata).Search(2.0), 3}, + {"StringArray.Search", StringArray(sdata).Search("x"), 3}, +} + + +func TestSearchWrappers(t *testing.T) { + for _, e := range wrappertests { + if e.result != e.i { + t.Errorf("%s: expected index %d; got %d", e.name, e.i, e.result) + } + } +} + + +// Abstract exhaustive test: all sizes up to 100, +// all possible return values. If there are any small +// corner cases, this test exercises them. +func TestSearchExhaustive(t *testing.T) { + for size := 0; size <= 100; size++ { + for targ := 0; targ <= size; targ++ { + i := Search(size, func(i int) bool { return i >= targ }) + if i != targ { + t.Errorf("Search(%d, %d) = %d", size, targ, i) + } + } + } +} diff --git a/libgo/go/sort/sort.go b/libgo/go/sort/sort.go new file mode 100644 index 000000000..c7945d21b --- /dev/null +++ b/libgo/go/sort/sort.go @@ -0,0 +1,206 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The sort package provides primitives for sorting arrays +// and user-defined collections. +package sort + +// A type, typically a collection, that satisfies sort.Interface can be +// sorted by the routines in this package. The methods require that the +// elements of the collection be enumerated by an integer index. +type Interface interface { + // Len is the number of elements in the collection. + Len() int + // Less returns whether the element with index i should sort + // before the element with index j. + Less(i, j int) bool + // Swap swaps the elements with indexes i and j. + Swap(i, j int) +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +// Insertion sort +func insertionSort(data Interface, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data.Less(j, j-1); j-- { + data.Swap(j, j-1) + } + } +} + +// Quicksort, following Bentley and McIlroy, +// ``Engineering a Sort Function,'' SP&E November 1993. + +// Move the median of the three values data[a], data[b], data[c] into data[a]. +func medianOfThree(data Interface, a, b, c int) { + m0 := b + m1 := a + m2 := c + // bubble sort on 3 elements + if data.Less(m1, m0) { + data.Swap(m1, m0) + } + if data.Less(m2, m1) { + data.Swap(m2, m1) + } + if data.Less(m1, m0) { + data.Swap(m1, m0) + } + // now data[m0] <= data[m1] <= data[m2] +} + +func swapRange(data Interface, a, b, n int) { + for i := 0; i < n; i++ { + data.Swap(a+i, b+i) + } +} + +func doPivot(data Interface, lo, hi int) (midlo, midhi int) { + m := lo + (hi-lo)/2 // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThree(data, lo, lo+s, lo+2*s) + medianOfThree(data, m, m-s, m+s) + medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThree(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo <= i < a] = pivot + // data[a <= i < b] < pivot + // data[b <= i < c] is unexamined + // data[c <= i < d] > pivot + // data[d <= i < hi] = pivot + // + // Once b meets c, can swap the "= pivot" sections + // into the middle of the array. + pivot := lo + a, b, c, d := lo+1, lo+1, hi, hi + for b < c { + if data.Less(b, pivot) { // data[b] < pivot + b++ + continue + } + if !data.Less(pivot, b) { // data[b] = pivot + data.Swap(a, b) + a++ + b++ + continue + } + if data.Less(pivot, c-1) { // data[c-1] > pivot + c-- + continue + } + if !data.Less(c-1, pivot) { // data[c-1] = pivot + data.Swap(c-1, d-1) + c-- + d-- + continue + } + // data[b] > pivot; data[c-1] < pivot + data.Swap(b, c-1) + b++ + c-- + } + + n := min(b-a, a-lo) + swapRange(data, lo, b-n, n) + + n = min(hi-d, d-c) + swapRange(data, c, hi-n, n) + + return lo + b - a, hi - (d - c) +} + +func quickSort(data Interface, a, b int) { + for b-a > 7 { + mlo, mhi := doPivot(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSort(data, a, mlo) + a = mhi // i.e., quickSort(data, mhi, b) + } else { + quickSort(data, mhi, b) + b = mlo // i.e., quickSort(data, a, mlo) + } + } + if b-a > 1 { + insertionSort(data, a, b) + } +} + +func Sort(data Interface) { quickSort(data, 0, data.Len()) } + + +func IsSorted(data Interface) bool { + n := data.Len() + for i := n - 1; i > 0; i-- { + if data.Less(i, i-1) { + return false + } + } + return true +} + + +// Convenience types for common cases + +// IntArray attaches the methods of Interface to []int, sorting in increasing order. +type IntArray []int + +func (p IntArray) Len() int { return len(p) } +func (p IntArray) Less(i, j int) bool { return p[i] < p[j] } +func (p IntArray) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +// Sort is a convenience method. +func (p IntArray) Sort() { Sort(p) } + + +// Float64Array attaches the methods of Interface to []float64, sorting in increasing order. +type Float64Array []float64 + +func (p Float64Array) Len() int { return len(p) } +func (p Float64Array) Less(i, j int) bool { return p[i] < p[j] } +func (p Float64Array) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +// Sort is a convenience method. +func (p Float64Array) Sort() { Sort(p) } + + +// StringArray attaches the methods of Interface to []string, sorting in increasing order. +type StringArray []string + +func (p StringArray) Len() int { return len(p) } +func (p StringArray) Less(i, j int) bool { return p[i] < p[j] } +func (p StringArray) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +// Sort is a convenience method. +func (p StringArray) Sort() { Sort(p) } + + +// Convenience wrappers for common cases + +// SortInts sorts an array of ints in increasing order. +func SortInts(a []int) { Sort(IntArray(a)) } +// SortFloat64s sorts an array of float64s in increasing order. +func SortFloat64s(a []float64) { Sort(Float64Array(a)) } +// SortStrings sorts an array of strings in increasing order. +func SortStrings(a []string) { Sort(StringArray(a)) } + + +// IntsAreSorted tests whether an array of ints is sorted in increasing order. +func IntsAreSorted(a []int) bool { return IsSorted(IntArray(a)) } +// Float64sAreSorted tests whether an array of float64s is sorted in increasing order. +func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Array(a)) } +// StringsAreSorted tests whether an array of strings is sorted in increasing order. +func StringsAreSorted(a []string) bool { return IsSorted(StringArray(a)) } diff --git a/libgo/go/sort/sort_test.go b/libgo/go/sort/sort_test.go new file mode 100644 index 000000000..1bea8f032 --- /dev/null +++ b/libgo/go/sort/sort_test.go @@ -0,0 +1,267 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sort + +import ( + "fmt" + "rand" + "strconv" + "testing" +) + + +var ints = [...]int{74, 59, 238, -784, 9845, 959, 905, 0, 0, 42, 7586, -5467984, 7586} +var float64s = [...]float64{74.3, 59.0, 238.2, -784.0, 2.3, 9845.768, -959.7485, 905, 7.8, 7.8} +var strings = [...]string{"", "Hello", "foo", "bar", "foo", "f00", "%*&^*&^&", "***"} + +func TestSortIntArray(t *testing.T) { + data := ints + a := IntArray(data[0:]) + Sort(a) + if !IsSorted(a) { + t.Errorf("sorted %v", ints) + t.Errorf(" got %v", data) + } +} + +func TestSortFloat64Array(t *testing.T) { + data := float64s + a := Float64Array(data[0:]) + Sort(a) + if !IsSorted(a) { + t.Errorf("sorted %v", float64s) + t.Errorf(" got %v", data) + } +} + +func TestSortStringArray(t *testing.T) { + data := strings + a := StringArray(data[0:]) + Sort(a) + if !IsSorted(a) { + t.Errorf("sorted %v", strings) + t.Errorf(" got %v", data) + } +} + +func TestSortInts(t *testing.T) { + data := ints + SortInts(data[0:]) + if !IntsAreSorted(data[0:]) { + t.Errorf("sorted %v", ints) + t.Errorf(" got %v", data) + } +} + +func TestSortFloat64s(t *testing.T) { + data := float64s + SortFloat64s(data[0:]) + if !Float64sAreSorted(data[0:]) { + t.Errorf("sorted %v", float64s) + t.Errorf(" got %v", data) + } +} + +func TestSortStrings(t *testing.T) { + data := strings + SortStrings(data[0:]) + if !StringsAreSorted(data[0:]) { + t.Errorf("sorted %v", strings) + t.Errorf(" got %v", data) + } +} + +func TestSortLarge_Random(t *testing.T) { + data := make([]int, 1000000) + for i := 0; i < len(data); i++ { + data[i] = rand.Intn(100) + } + if IntsAreSorted(data) { + t.Fatalf("terrible rand.rand") + } + SortInts(data) + if !IntsAreSorted(data) { + t.Errorf("sort didn't sort - 1M ints") + } +} + +func BenchmarkSortString1K(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]string, 1<<10) + for i := 0; i < len(data); i++ { + data[i] = strconv.Itoa(i ^ 0x2cc) + } + b.StartTimer() + SortStrings(data) + b.StopTimer() + } +} + +func BenchmarkSortInt1K(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<10) + for i := 0; i < len(data); i++ { + data[i] = i ^ 0x2cc + } + b.StartTimer() + SortInts(data) + b.StopTimer() + } +} + +func BenchmarkSortInt64K(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<16) + for i := 0; i < len(data); i++ { + data[i] = i ^ 0xcccc + } + b.StartTimer() + SortInts(data) + b.StopTimer() + } +} + +const ( + _Sawtooth = iota + _Rand + _Stagger + _Plateau + _Shuffle + _NDist +) + +const ( + _Copy = iota + _Reverse + _ReverseFirstHalf + _ReverseSecondHalf + _Sorted + _Dither + _NMode +) + +type testingData struct { + desc string + t *testing.T + data []int + maxswap int // number of swaps allowed + nswap int +} + +func (d *testingData) Len() int { return len(d.data) } +func (d *testingData) Less(i, j int) bool { return d.data[i] < d.data[j] } +func (d *testingData) Swap(i, j int) { + if d.nswap >= d.maxswap { + d.t.Errorf("%s: used %d swaps sorting array of %d", d.desc, d.nswap, len(d.data)) + d.t.FailNow() + } + d.nswap++ + d.data[i], d.data[j] = d.data[j], d.data[i] +} + +func lg(n int) int { + i := 0 + for 1<<uint(i) < n { + i++ + } + return i +} + +func TestBentleyMcIlroy(t *testing.T) { + sizes := []int{100, 1023, 1024, 1025} + dists := []string{"sawtooth", "rand", "stagger", "plateau", "shuffle"} + modes := []string{"copy", "reverse", "reverse1", "reverse2", "sort", "dither"} + var tmp1, tmp2 [1025]int + for ni := 0; ni < len(sizes); ni++ { + n := sizes[ni] + for m := 1; m < 2*n; m *= 2 { + for dist := 0; dist < _NDist; dist++ { + j := 0 + k := 1 + data := tmp1[0:n] + for i := 0; i < n; i++ { + switch dist { + case _Sawtooth: + data[i] = i % m + case _Rand: + data[i] = rand.Intn(m) + case _Stagger: + data[i] = (i*m + i) % n + case _Plateau: + data[i] = min(i, m) + case _Shuffle: + if rand.Intn(m) != 0 { + j += 2 + data[i] = j + } else { + k += 2 + data[i] = k + } + } + } + + mdata := tmp2[0:n] + for mode := 0; mode < _NMode; mode++ { + switch mode { + case _Copy: + for i := 0; i < n; i++ { + mdata[i] = data[i] + } + case _Reverse: + for i := 0; i < n; i++ { + mdata[i] = data[n-i-1] + } + case _ReverseFirstHalf: + for i := 0; i < n/2; i++ { + mdata[i] = data[n/2-i-1] + } + for i := n / 2; i < n; i++ { + mdata[i] = data[i] + } + case _ReverseSecondHalf: + for i := 0; i < n/2; i++ { + mdata[i] = data[i] + } + for i := n / 2; i < n; i++ { + mdata[i] = data[n-(i-n/2)-1] + } + case _Sorted: + for i := 0; i < n; i++ { + mdata[i] = data[i] + } + // SortInts is known to be correct + // because mode Sort runs after mode _Copy. + SortInts(mdata) + case _Dither: + for i := 0; i < n; i++ { + mdata[i] = data[i] + i%5 + } + } + + desc := fmt.Sprintf("n=%d m=%d dist=%s mode=%s", n, m, dists[dist], modes[mode]) + d := &testingData{desc, t, mdata[0:n], n * lg(n) * 12 / 10, 0} + Sort(d) + + // If we were testing C qsort, we'd have to make a copy + // of the array and sort it ourselves and then compare + // x against it, to ensure that qsort was only permuting + // the data, not (for example) overwriting it with zeros. + // + // In go, we don't have to be so paranoid: since the only + // mutating method Sort can call is TestingData.swap, + // it suffices here just to check that the final array is sorted. + if !IntsAreSorted(mdata) { + t.Errorf("%s: ints not sorted", desc) + t.Errorf("\t%v", mdata) + t.FailNow() + } + } + } + } + } +} |