From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- gcc/testsuite/go.test/test/bench/timing.log | 500 ++++++++++++++++++++++++++++ 1 file changed, 500 insertions(+) create mode 100644 gcc/testsuite/go.test/test/bench/timing.log (limited to 'gcc/testsuite/go.test/test/bench/timing.log') diff --git a/gcc/testsuite/go.test/test/bench/timing.log b/gcc/testsuite/go.test/test/bench/timing.log new file mode 100644 index 000000000..e7b0b48c1 --- /dev/null +++ b/gcc/testsuite/go.test/test/bench/timing.log @@ -0,0 +1,500 @@ +All tests on r45 or r70 + +Aug 3 2009 + +First version of fasta. Translation of fasta.c, fetched from + http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4 + +fasta -n 25000000 + gcc -O2 fasta.c 5.98u 0.00s 6.01r + gccgo -O2 fasta.go 8.82u 0.02s 8.85r + 6g fasta.go 13.50u 0.02s 13.53r + 6g -B fata.go 12.99u 0.02s 13.02r + +Aug 4 2009 +[added timing.sh] + +# myrandom: +# hand-written optimization of integer division +# use int32->float conversion +fasta -n 25000000 + # probably I/O library inefficiencies + gcc -O2 fasta.c 5.99u 0.00s 6.00r + gccgo -O2 fasta.go 8.82u 0.02s 8.85r + gc fasta 10.70u 0.00s 10.77r + gc_B fasta 10.09u 0.03s 10.12r + +reverse-complement < output-of-fasta-25000000 + # we don't know - memory cache behavior? + gcc -O2 reverse-complement.c 2.04u 0.94s 10.54r + gccgo -O2 reverse-complement.go 6.54u 0.63s 7.17r + gc reverse-complement 6.55u 0.70s 7.26r + gc_B reverse-complement 6.32u 0.70s 7.10r + +nbody 50000000 + # math.Sqrt needs to be in assembly; inlining is probably the other 50% + gcc -O2 nbody.c 21.61u 0.01s 24.80r + gccgo -O2 nbody.go 118.55u 0.02s 120.32r + gc nbody 100.84u 0.00s 100.85r + gc_B nbody 103.33u 0.00s 103.39r +[ +hacked Sqrt in assembler + gc nbody 31.97u 0.00s 32.01r +] + +binary-tree 15 # too slow to use 20 + # memory allocation and garbage collection + gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r + gccgo -O2 binary-tree.go 1.69u 0.46s 2.15r + gccgo -O2 binary-tree-freelist.go 8.48u 0.00s 8.48r + gc binary-tree 9.60u 0.01s 9.62r + gc binary-tree-freelist 0.48u 0.01s 0.50r + +August 5, 2009 + +fannkuch 12 + # bounds checking is half the difference + # rest might be registerization + gcc -O2 fannkuch.c 60.09u 0.01s 60.32r + gccgo -O2 fannkuch.go 64.89u 0.00s 64.92r + gc fannkuch 124.59u 0.00s 124.67r + gc_B fannkuch 91.14u 0.00s 91.16r + +regex-dna 100000 + # regexp code is slow on trivial regexp + gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.99r + gc regexp-dna 26.94u 0.18s 28.75r + gc_B regexp-dna 26.51u 0.09s 26.75r + +spectral-norm 5500 + gcc -O2 spectral-norm.c -lm 11.54u 0.00s 11.55r + gccgo -O2 spectral-norm.go 12.20u 0.00s 12.23r + gc spectral-norm 50.23u 0.00s 50.36r + gc_B spectral-norm 49.69u 0.01s 49.83r + gc spectral-norm-parallel 24.47u 0.03s 11.05r # has shift >>1 not div /2 + [using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r] + +August 6, 2009 + +k-nucleotide 5000000 + # string maps are slower than glib string maps + gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 k-nucleotide.c: 10.72u 0.01s 10.74r + gccgo -O2 k-nucleotide.go 21.64u 0.83s 22.78r + gc k-nucleotide 16.08u 0.06s 16.50r + gc_B k-nucleotide 17.32u 0.02s 17.37r + +mandelbrot 5500 + # floating point code generator should use more registers + gcc -O2 mandelbrot.c 56.13u 0.02s 56.17r + gccgo -O2 mandelbrot.go 57.49u 0.01s 57.51r + gc mandelbrot 74.32u 0.00s 74.35r + gc_B mandelbrot 74.28u 0.01s 74.31r + +meteor 16000 + # we don't know + gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r + gccgo -O2 meteor-contest.go 0.12u 0.00s 0.14r + gc meteor-contest 0.24u 0.00s 0.26r + gc_B meteor-contest 0.23u 0.00s 0.24r + +pidigits 10000 + # bignum is slower than gmp + gcc -O2 pidigits.c -lgmp 2.60u 0.00s 2.62r + gc pidigits 77.69u 0.14s 78.18r + gc_B pidigits 74.26u 0.18s 75.41r + gc_B pidigits 68.48u 0.20s 69.31r # special case: no bounds checking in bignum + +August 7 2009 + +# New gc does better division by powers of 2. Significant improvements: + +spectral-norm 5500 + # floating point code generator should use more registers; possibly inline evalA + gcc -O2 spectral-norm.c -lm 11.50u 0.00s 11.50r + gccgo -O2 spectral-norm.go 12.02u 0.00s 12.02r + gc spectral-norm 23.98u 0.00s 24.00r # new time is 0.48 times old time, 52% faster + gc_B spectral-norm 23.71u 0.01s 23.72r # ditto + gc spectral-norm-parallel 24.04u 0.00s 6.26r # /2 put back. note: 4x faster (on r70, idle) + +k-nucleotide 1000000 + # string maps are slower than glib string maps + gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.82u 0.04s 10.87r + gccgo -O2 k-nucleotide.go 22.73u 0.89s 23.63r + gc k-nucleotide 15.97u 0.03s 16.04r + gc_B k-nucleotide 15.86u 0.06s 15.93r # 8.5% faster, but probably due to weird cache effeccts in previous version + +pidigits 10000 + # bignum is slower than gmp + gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.58r + gc pidigits 71.24u 0.04s 71.28r # 8.5% faster + gc_B pidigits 71.25u 0.03s 71.29r # 4% faster + +threadring 50000000 + gcc -O2 threadring.c -lpthread 35.51u 160.21s 199.50r + gccgo -O2 threadring.go 90.33u 459.95s 448.03r + gc threadring 33.11u 0.00s 33.14r + GOMAXPROCS=4 gc threadring 114.48u 226.65s 371.59r + # change wait code to do <-make(chan int) instead of time.Sleep + gc threadring 28.41u 0.01s 29.35r + GOMAXPROCS=4 gc threadring 112.59u 232.83s 384.72r + +chameneos 6000000 + gcc -O2 chameneosredux.c -lpthread 18.14u 276.52s 76.93r + gc chameneosredux 20.19u 0.01s 20.23r + +Aug 10 2009 + +# new 6g with better fp registers, fast div and mod of integers +# complete set of timings listed. significant changes marked *** + +fasta -n 25000000 + # probably I/O library inefficiencies + gcc -O2 fasta.c 5.96u 0.00s 5.97r + gc fasta 10.59u 0.01s 10.61r + gc_B fasta 9.92u 0.02s 9.95r + +reverse-complement < output-of-fasta-25000000 + # we don't know - memory cache behavior? + gcc -O2 reverse-complement.c 1.96u 1.56s 16.23r + gccgo -O2 reverse-complement.go 6.41u 0.62s 7.05r + gc reverse-complement 6.46u 0.70s 7.17r + gc_B reverse-complement 6.22u 0.72s 6.95r + +nbody 50000000 + # math.Sqrt needs to be in assembly; inlining is probably the other 50% + gcc -O2 nbody.c 21.26u 0.01s 21.28r + gccgo -O2 nbody.go 116.68u 0.07s 116.80r + gc nbody 86.64u 0.01s 86.68r # -14% + gc_B nbody 85.72u 0.02s 85.77r # *** -17% + +binary-tree 15 # too slow to use 20 + # memory allocation and garbage collection + gcc -O2 binary-tree.c -lm 0.87u 0.00s 0.87r + gccgo -O2 binary-tree.go 1.61u 0.47s 2.09r + gccgo -O2 binary-tree-freelist.go 0.00u 0.00s 0.01r + gc binary-tree 9.11u 0.01s 9.13r # *** -5% + gc binary-tree-freelist 0.47u 0.01s 0.48r + +fannkuch 12 + # bounds checking is half the difference + # rest might be registerization + gcc -O2 fannkuch.c 59.92u 0.00s 59.94r + gccgo -O2 fannkuch.go 65.54u 0.00s 65.58r + gc fannkuch 123.98u 0.01s 124.04r + gc_B fannkuch 90.75u 0.00s 90.78r + +regex-dna 100000 + # regexp code is slow on trivial regexp + gcc -O2 regex-dna.c -lpcre 0.91u 0.00s 0.92r + gc regex-dna 27.25u 0.02s 27.28r + gc_B regex-dna 29.51u 0.03s 29.55r + +spectral-norm 5500 + # possibly inline evalA + gcc -O2 spectral-norm.c -lm 11.57u 0.00s 11.57r + gccgo -O2 spectral-norm.go 12.07u 0.01s 12.08r + gc spectral-norm 23.99u 0.00s 24.00r + gc_B spectral-norm 23.73u 0.00s 23.75r + +k-nucleotide 1000000 + # string maps are slower than glib string maps + gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.63u 0.02s 10.69r + gccgo -O2 k-nucleotide.go 23.19u 0.91s 24.12r + gc k-nucleotide 16.73u 0.04s 16.78r # *** +5% (but this one seems to vary by more than that) + gc_B k-nucleotide 16.46u 0.04s 16.51r # *** +5% + +mandelbrot 16000 + gcc -O2 mandelbrot.c 56.16u 0.00s 56.16r + gccgo -O2 mandelbrot.go 57.41u 0.01s 57.42r + gc mandelbrot 64.05u 0.02s 64.08r # *** -14% + gc_B mandelbrot 64.10u 0.02s 64.14r # *** -14% + +meteor 16000 + # we don't know + gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r + gccgo -O2 meteor-contest.go 0.12u 0.00s 0.12r + gc meteor-contest 0.18u 0.00s 0.20r # *** -25% + gc_B meteor-contest 0.17u 0.00s 0.18r # *** -24% + +pidigits 10000 + # bignum is slower than gmp + gcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.57r + gc pidigits 71.82u 0.04s 71.89r + gc_B pidigits 71.84u 0.08s 71.98r + +threadring 50000000 + gcc -O2 threadring.c -lpthread 30.91u 164.33s 204.57r + gccgo -O2 threadring.go 87.12u 460.04s 447.61r + gc threadring 38.55u 0.00s 38.56r # *** +16% + +chameneos 6000000 + gcc -O2 chameneosredux.c -lpthread 17.93u 323.65s 88.47r + gc chameneosredux 21.72u 0.00s 21.73r + +August 10 2009 + +# In-place versions for some bignum operations. +pidigits 10000 + gcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57r + gc pidigits 55.22u 0.04s 55.29r # *** -23% + gc_B pidigits 55.49u 0.02s 55.60r # *** -23% + +September 3 2009 + +# New 6g inlines slices, has a few other tweaks. +# Complete rerun. Significant changes marked. + +fasta -n 25000000 + # probably I/O library inefficiencies + gcc -O2 fasta.c 5.96u 0.00s 5.96r + gc fasta 10.63u 0.02s 10.66r + gc_B fasta 9.92u 0.01s 9.94r + +reverse-complement < output-of-fasta-25000000 + # we don't know - memory cache behavior? + gcc -O2 reverse-complement.c 1.92u 0.33s 2.93r + gccgo -O2 reverse-complement.go 6.76u 0.72s 7.58r # +5% + gc reverse-complement 6.59u 0.70s 7.29r # +2% + gc_B reverse-complement 5.57u 0.80s 6.37r # -10% + +nbody 50000000 + # math.Sqrt needs to be in assembly; inlining is probably the other 50% + # also loop alignment appears to be critical + gcc -O2 nbody.c 21.28u 0.00s 21.28r + gccgo -O2 nbody.go 119.21u 0.00s 119.22r # +2% + gc nbody 109.72u 0.00s 109.78r # + 28% ***** + gc_B nbody 85.90u 0.00s 85.91r + +binary-tree 15 # too slow to use 20 + # memory allocation and garbage collection + gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r + gccgo -O2 binary-tree.go 1.88u 0.54s 2.42r # +17% + gccgo -O2 binary-tree-freelist.go 0.01u 0.01s 0.02r + gc binary-tree 8.94u 0.01s 8.96r # -2% + gc binary-tree-freelist 0.47u 0.01s 0.48r + +fannkuch 12 + # bounds checking is half the difference + # rest might be registerization + gcc -O2 fannkuch.c 60.12u 0.00s 60.12r + gccgo -O2 fannkuch.go 92.62u 0.00s 92.66r # +41% *** + gc fannkuch 123.90u 0.00s 123.92r + gc_B fannkuch 89.71u 0.00s 89.74r # -1% + +regex-dna 100000 + # regexp code is slow on trivial regexp + gcc -O2 regex-dna.c -lpcre 0.88u 0.00s 0.88r + gc regex-dna 25.77u 0.01s 25.79r # -5% + gc_B regex-dna 26.05u 0.02s 26.09r # -12% *** + +spectral-norm 5500 + # possibly inline evalA + gcc -O2 spectral-norm.c -lm 11.51u 0.00s 11.51r + gccgo -O2 spectral-norm.go 11.95u 0.00s 11.96r + gc spectral-norm 24.23u 0.00s 24.23r + gc_B spectral-norm 23.83u 0.00s 23.84r + +k-nucleotide 1000000 + # string maps are slower than glib string maps + gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.68u 0.04s 10.72r + gccgo -O2 k-nucleotide.go 23.03u 0.88s 23.92r + gc k-nucleotide 15.79u 0.05s 15.85r # -5% (but this one seems to vary by more than that) + gc_B k-nucleotide 17.88u 0.05s 17.95r # +8% (ditto) + +mandelbrot 16000 + gcc -O2 mandelbrot.c 56.17u 0.02s 56.20r + gccgo -O2 mandelbrot.go 56.74u 0.02s 56.79r # -1% + gc mandelbrot 63.31u 0.01s 63.35r # -1% + gc_B mandelbrot 63.29u 0.00s 63.31r # -1% + +meteor 16000 + # we don't know + gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r + gccgo -O2 meteor-contest.go 0.11u 0.00s 0.12r + gc meteor-contest 0.18u 0.00s 0.19r + gc_B meteor-contest 0.17u 0.00s 0.18r + +pidigits 10000 + # bignum is slower than gmp + gcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57r + gc pidigits 55.87u 0.03s 55.91r + gc_B pidigits 55.93u 0.03s 55.99r + +# these tests are compared using real time, since they run multiple processors +# accuracy probably low +threadring 50000000 + gcc -O2 threadring.c -lpthread 26.31u 164.69s 199.92r # -2% + gccgo -O2 threadring.go 87.90u 487.26s 472.81r # +6% + gc threadring 28.89u 0.00s 28.90r # -25% *** + +chameneos 6000000 + gcc -O2 chameneosredux.c -lpthread 16.41u 296.91s 81.17r # -8% + gc chameneosredux 19.97u 0.00s 19.97r # -8% + +Sep 22, 2009 + +# 6g inlines sliceslice in most cases. + +fasta -n 25000000 + # probably I/O library inefficiencies + gc fasta 10.24u 0.00s 10.25r # -4% + gc_B fasta 9.68u 0.01s 9.69r # -3% + +reverse-complement < output-of-fasta-25000000 + # we don't know - memory cache behavior? + gc reverse-complement 6.67u 0.69s 7.37r # +1% + gc_B reverse-complement 6.00u 0.64s 6.65r # +7% + +nbody -n 50000000 + # math.Sqrt needs to be in assembly; inlining is probably the other 50% + # also loop alignment appears to be critical + gc nbody 86.27u 0.00s 86.29r # -21% + gc_B nbody 104.52u 0.00s 104.54r # +22% + +fannkuch 12 + # bounds checking is half the difference + # rest might be registerization + gc fannkuch 128.36u 0.00s 128.37r # +4% + gc_B fannkuch 89.32u 0.00s 89.34r + +regex-dna 100000 + # regexp code is slow on trivial regexp + gc regex-dna 24.82u 0.01s 24.86r # -4% + gc_B regex-dna 24.55u 0.01s 24.57r # -6% + +spectral-norm 5500 + # possibly inline evalA + gc spectral-norm 24.05u 0.00s 24.07r # -1% + gc_B spectral-norm 23.60u 0.00s 23.65r # -1% + +k-nucleotide 1000000 + # string maps are slower than glib string maps + gc k-nucleotide 17.84u 0.04s 17.89r # +13% but mysterious variation continues + gc_B k-nucleotide 15.56u 0.08s 15.65r # -13% (ditto) + +mandelbrot 16000 + gc mandelbrot 64.08u 0.01s 64.11r # +1% + gc_B mandelbrot 64.04u 0.00s 64.05r # +1% + +pidigits 10000 + # bignum is slower than gmp + gc pidigits 58.68u 0.02s 58.72r # +5% + gc_B pidigits 58.86u 0.05s 58.99r # +5% + +# these tests are compared using real time, since they run multiple processors +# accuracy probably low +threadring 50000000 + gc threadring 32.70u 0.02s 32.77r # +13% + +chameneos 6000000 + gc chameneosredux 26.62u 0.00s 26.63r # +13% + +Sep 24, 2009 + +# Sqrt now in assembler for 6g. +nbody -n 50000000 + # remember, at least for 6g, alignment of loops may be important + gcc -O2 nbody.c 21.24u 0.00s 21.25r + gccgo -O2 nbody.go 121.03u 0.00s 121.04r + gc nbody 30.26u 0.00s 30.27r # -65% *** + gc_B nbody 30.20u 0.02s 30.22r # -72% *** + +Nov 13 2009 + +# fix bug in regexp; take performance hit. good regexps will come in time. +regex-dna 100000 + gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.94r + gc regex-dna 29.78u 0.03s 29.83r + gc_B regex-dna 32.63u 0.03s 32.74r + +Nov 24 2009 + +# Roger Peppe's rewrite of the benchmark +chameneos 6000000 + gcc -O2 chameneosredux.c -lpthread 18.00u 303.29s 83.64r + gc chameneosredux 12.10u 0.00s 12.10r # 2.22X faster + +Jan 6, 2009 + +# Long-overdue update. All numbers included in this complete run. +# Some programs (e.g. reverse-complement) rewritten for speed. +# Regular expressions much faster in common cases (although still far behind PCRE) +# Bignum stuff improved +# Better (but sometimes slower) locking in channels. + +fasta -n 25000000 + gcc -O2 fasta.c 5.99u 0.01s 6.00r + gc fasta 9.11u 0.00s 9.12r # -11% + gc_B fasta 8.60u 0.00s 8.62r # +12% ?? + +reverse-complement < output-of-fasta-25000000 + gcc -O2 reverse-complement.c 2.00u 0.80s 9.54r + gccgo -O2 reverse-complement.go 4.57u 0.35s 4.94r # 33% faster + gc reverse-complement 2.01u 0.38s 2.40r # 3.3X faster + gc_B reverse-complement 1.88u 0.36s 2.24r # 3.2X faster +GOGC=off + gc reverse-complement 2.01u 0.35s 2.37r + gc_B reverse-complement 1.86u 0.32s 2.19r + +nbody -n 50000000 + gcc -O2 nbody.c 21.28u 0.00s 21.31r + gccgo -O2 nbody.go 80.02u 0.00s 80.05r # 33% faster + gc nbody 30.13u 0.00s 30.13r + gc_B nbody 29.89u 0.01s 29.91r + +binary-tree 15 # too slow to use 20 + gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r + gccgo -O2 binary-tree.go 4.82u 0.41s 5.24r # 2.5X slower + gccgo -O2 binary-tree-freelist.go 0.00u 0.00s 0.00r + gc binary-tree 7.23u 0.01s 7.25r # # -19% + gc binary-tree-freelist 0.43u 0.00s 0.44r # -9% + +fannkuch 12 + gcc -O2 fannkuch.c 60.17u 0.00s 60.17r + gccgo -O2 fannkuch.go 78.47u 0.01s 78.49r + gc fannkuch 128.86u 0.00s 128.96r + gc_B fannkuch 90.17u 0.00s 90.21r + +regex-dna 100000 + gcc -O2 regex-dna.c -lpcre 0.90u 0.00s 0.92r + gc regex-dna 9.48u 0.01s 9.50r # 3.1X faster + gc_B regex-dna 9.08u 0.00s 9.10r # 3.6X faster + +spectral-norm 5500 + gcc -O2 spectral-norm.c -lm 11.48u 0.00s 11.48r + gccgo -O2 spectral-norm.go 11.68u 0.00s 11.70r + gc spectral-norm 23.98u 0.00s 23.99r + gc_B spectral-norm 23.68u 0.00s 23.69r + +k-nucleotide 1000000 + gcc -O2 k-nucleotide.c 10.85u 0.04s 10.90r + gccgo -O2 k-nucleotide.go 25.26u 0.87s 26.14r + gc k-nucleotide 15.28u 0.06s 15.37r # restored; mysterious variation continues + gc_B k-nucleotide 15.97u 0.03s 16.00r + +mandelbrot 16000 + gcc -O2 mandelbrot.c 56.12u 0.01s 56.15r + gccgo -O2 mandelbrot.go 56.86u 0.01s 56.89r + gc mandelbrot 66.05u 0.00s 66.07r # -3% + gc_B mandelbrot 66.06u 0.00s 66.07r # -3% + +meteor 16000 + gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r + gccgo -O2 meteor-contest.go 0.12u 0.00s 0.12r + gc meteor-contest 0.17u 0.00s 0.17r + gc_B meteor-contest 0.15u 0.00s 0.16r + +pidigits 10000 + gcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.59r + gc pidigits 38.27u 0.02s 38.30r # 1.5X faster + gc_B pidigits 38.27u 0.02s 38.31r # 1.5X faster + +threadring 50000000 + gcc -O2 threadring.c 37.11u 170.59s 212.75r + gccgo -O2 threadring.go 89.67u 447.56s 442.55r # -6.5% + gc threadring 36.08u 0.04s 36.15r # +10% + +chameneos 6000000 + gcc -O2 chameneosredux.c -lpthread 19.02u 331.08s 90.79r + gc chameneosredux 12.54u 0.00s 12.55r + -- cgit v1.2.3