diff options
Diffstat (limited to 'gcc/config/i386/athlon.md')
-rw-r--r-- | gcc/config/i386/athlon.md | 1187 |
1 files changed, 1187 insertions, 0 deletions
diff --git a/gcc/config/i386/athlon.md b/gcc/config/i386/athlon.md new file mode 100644 index 000000000..2896a154d --- /dev/null +++ b/gcc/config/i386/athlon.md @@ -0,0 +1,1187 @@ +;; Copyright (C) 2002, 2003, 2004, 2005, 2006, +;; 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. +;; +;; AMD Athlon Scheduling +;; +;; The Athlon does contain three pipelined FP units, three integer units and +;; three address generation units. +;; +;; The predecode logic is determining boundaries of instructions in the 64 +;; byte cache line. So the cache line straddling problem of K6 might be issue +;; here as well, but it is not noted in the documentation. +;; +;; Three DirectPath instructions decoders and only one VectorPath decoder +;; is available. They can decode three DirectPath instructions or one VectorPath +;; instruction per cycle. +;; Decoded macro instructions are then passed to 72 entry instruction control +;; unit, that passes +;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. +;; +;; The load/store queue unit is not attached to the schedulers but +;; communicates with all the execution units separately instead. + +(define_attr "athlon_decode" "direct,vector,double" + (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave") + (const_string "vector") + (and (eq_attr "type" "push") + (match_operand 1 "memory_operand" "")) + (const_string "vector") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load,store") + (eq_attr "mode" "XF"))) + (const_string "vector")] + (const_string "direct"))) + +(define_attr "amdfam10_decode" "direct,vector,double" + (const_string "direct")) +;; +;; decode0 decode1 decode2 +;; \ | / +;; instruction control unit (72 entry scheduler) +;; | | +;; integer scheduler (18) stack map +;; / | | | | \ stack rename +;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler +;; | agu0 | agu1 agu2 register file +;; | \ | | / | | | +;; \ /\ | / fadd fmul fstore +;; \ / \ | / fadd fmul fstore +;; imul load/store (2x) fadd fmul fstore + +(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp") +(define_cpu_unit "athlon-decode0" "athlon") +(define_cpu_unit "athlon-decode1" "athlon") +(define_cpu_unit "athlon-decode2" "athlon") +(define_cpu_unit "athlon-decodev" "athlon") +;; Model the fact that double decoded instruction may take 2 cycles +;; to decode when decoder2 and decoder0 in next cycle +;; is used (this is needed to allow troughput of 1.5 double decoded +;; instructions per cycle). +;; +;; In order to avoid dependence between reservation of decoder +;; and other units, we model decoder as two stage fully pipelined unit +;; and only double decoded instruction may occupy unit in the first cycle. +;; With this scheme however two double instructions can be issued cycle0. +;; +;; Avoid this by using presence set requiring decoder0 to be allocated +;; too. Vector decoded instructions then can't be issued when +;; modeled as consuming decoder0+decoder1+decoder2. +;; We solve that by specialized vector decoder unit and exclusion set. +(presence_set "athlon-decode2" "athlon-decode0") +(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2") +(define_reservation "athlon-vector" "nothing,athlon-decodev") +(define_reservation "athlon-direct0" "nothing,athlon-decode0") +(define_reservation "athlon-direct" "nothing, + (athlon-decode0 | athlon-decode1 + | athlon-decode2)") +;; Double instructions behaves like two direct instructions. +(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0) + | (nothing,(athlon-decode0 + athlon-decode1)) + | (nothing,(athlon-decode1 + athlon-decode2)))") + +;; Agu and ieu unit results in extremely large automatons and +;; in our approximation they are hardly filled in. Only ieu +;; unit can, as issue rate is 3 and agu unit is always used +;; first in the insn reservations. Skip the models. + +;(define_cpu_unit "athlon-ieu0" "athlon_ieu") +;(define_cpu_unit "athlon-ieu1" "athlon_ieu") +;(define_cpu_unit "athlon-ieu2" "athlon_ieu") +;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)") +(define_reservation "athlon-ieu" "nothing") +(define_cpu_unit "athlon-ieu0" "athlon") +;(define_cpu_unit "athlon-agu0" "athlon_agu") +;(define_cpu_unit "athlon-agu1" "athlon_agu") +;(define_cpu_unit "athlon-agu2" "athlon_agu") +;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)") +(define_reservation "athlon-agu" "nothing") + +(define_cpu_unit "athlon-mult" "athlon_mult") + +(define_cpu_unit "athlon-load0" "athlon_load") +(define_cpu_unit "athlon-load1" "athlon_load") +(define_reservation "athlon-load" "athlon-agu, + (athlon-load0 | athlon-load1),nothing") +;; 128bit SSE instructions issue two loads at once +(define_reservation "athlon-load2" "athlon-agu, + (athlon-load0 + athlon-load1),nothing") + +(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)") +;; 128bit SSE instructions issue two stores at once +(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)") + + +;; The FP operations start to execute at stage 12 in the pipeline, while +;; integer operations start to execute at stage 9 for Athlon and 11 for K8 +;; Compensate the difference for Athlon because it results in significantly +;; smaller automata. +(define_reservation "athlon-fpsched" "nothing,nothing,nothing") +;; The floating point loads. +(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)") +(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)") +(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)") +(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)") + + +;; The three fp units are fully pipelined with latency of 3 +(define_cpu_unit "athlon-fadd" "athlon_fp") +(define_cpu_unit "athlon-fmul" "athlon_fp") +(define_cpu_unit "athlon-fstore" "athlon_fp") +(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)") +(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)") + +;; Vector operations usually consume many of pipes. +(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)") + + +;; Jump instructions are executed in the branch unit completely transparent to us +(define_insn_reservation "athlon_branch" 0 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (eq_attr "type" "ibr")) + "athlon-direct,athlon-ieu") +(define_insn_reservation "athlon_call" 0 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "call,callv")) + "athlon-vector,athlon-ieu") +(define_insn_reservation "athlon_call_amdfam10" 0 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "call,callv")) + "athlon-double,athlon-ieu") + +;; Latency of push operation is 3 cycles, but ESP value is available +;; earlier +(define_insn_reservation "athlon_push" 2 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (eq_attr "type" "push")) + "athlon-direct,athlon-agu,athlon-store") +(define_insn_reservation "athlon_pop" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "pop")) + "athlon-vector,athlon-load,athlon-ieu") +(define_insn_reservation "athlon_pop_k8" 3 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "pop")) + "athlon-double,(athlon-ieu+athlon-load)") +(define_insn_reservation "athlon_pop_amdfam10" 3 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "pop")) + "athlon-direct,(athlon-ieu+athlon-load)") +(define_insn_reservation "athlon_leave" 3 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "leave")) + "athlon-vector,(athlon-ieu+athlon-load)") +(define_insn_reservation "athlon_leave_k8" 3 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (eq_attr "type" "leave")) + "athlon-double,(athlon-ieu+athlon-load)") + +;; Lea executes in AGU unit with 2 cycles latency. +(define_insn_reservation "athlon_lea" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "lea")) + "athlon-direct,athlon-agu,nothing") +;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10 +(define_insn_reservation "athlon_lea_amdfam10" 1 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "lea")) + "athlon-direct,athlon-agu,nothing") + +;; Mul executes in special multiplier unit attached to IEU0 +(define_insn_reservation "athlon_imul" 5 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "imul") + (eq_attr "memory" "none,unknown"))) + "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0") +;; ??? Widening multiply is vector or double. +(define_insn_reservation "athlon_imul_k8_DI" 4 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "imul") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "none,unknown")))) + "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") +(define_insn_reservation "athlon_imul_k8" 3 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "imul") + (eq_attr "memory" "none,unknown"))) + "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0") +(define_insn_reservation "athlon_imul_amdfam10_HI" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "imul") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "none,unknown")))) + "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") +(define_insn_reservation "athlon_imul_mem" 8 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "imul") + (eq_attr "memory" "load,both"))) + "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu") +(define_insn_reservation "athlon_imul_mem_k8_DI" 7 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "imul") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "load,both")))) + "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu") +(define_insn_reservation "athlon_imul_mem_k8" 6 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "imul") + (eq_attr "memory" "load,both"))) + "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu") + +;; Idiv cannot execute in parallel with other instructions. Dealing with it +;; as with short latency vector instruction is good approximation avoiding +;; scheduler from trying too hard to can hide it's latency by overlap with +;; other instructions. +;; ??? Experiments show that the idiv can overlap with roughly 6 cycles +;; of the other code +;; Using the same heuristics for amdfam10 as K8 with idiv + +(define_insn_reservation "athlon_idiv" 6 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "type" "idiv") + (eq_attr "memory" "none,unknown"))) + "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))") +(define_insn_reservation "athlon_idiv_mem" 9 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "type" "idiv") + (eq_attr "memory" "load,both"))) + "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))") +;; The parallelism of string instructions is not documented. Model it same way +;; as idiv to create smaller automata. This probably does not matter much. +;; Using the same heuristics for amdfam10 as K8 with idiv +(define_insn_reservation "athlon_str" 6 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "type" "str") + (eq_attr "memory" "load,both,store"))) + "athlon-vector,athlon-load,athlon-ieu0*6") + +(define_insn_reservation "athlon_idirect" 1 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "none,unknown")))) + "athlon-direct,athlon-ieu") +(define_insn_reservation "athlon_idirect_amdfam10" 1 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "none,unknown")))) + "athlon-direct,athlon-ieu") +(define_insn_reservation "athlon_ivector" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "none,unknown")))) + "athlon-vector,athlon-ieu,athlon-ieu") +(define_insn_reservation "athlon_ivector_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "none,unknown")))) + "athlon-vector,athlon-ieu,athlon-ieu") + +(define_insn_reservation "athlon_idirect_loadmov" 3 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "type" "imov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-load") + +(define_insn_reservation "athlon_idirect_load" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-load,athlon-ieu") +(define_insn_reservation "athlon_idirect_load_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-load,athlon-ieu") +(define_insn_reservation "athlon_ivector_load" 6 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") +(define_insn_reservation "athlon_ivector_load_amdfam10" 6 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") + +(define_insn_reservation "athlon_idirect_movstore" 1 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "type" "imov") + (eq_attr "memory" "store"))) + "athlon-direct,athlon-agu,athlon-store") + +(define_insn_reservation "athlon_idirect_both" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "both")))) + "athlon-direct,athlon-load, + athlon-ieu,athlon-store, + athlon-store") +(define_insn_reservation "athlon_idirect_both_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "both")))) + "athlon-direct,athlon-load, + athlon-ieu,athlon-store, + athlon-store") + +(define_insn_reservation "athlon_ivector_both" 6 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "both")))) + "athlon-vector,athlon-load, + athlon-ieu, + athlon-ieu, + athlon-store") +(define_insn_reservation "athlon_ivector_both_amdfam10" 6 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "both")))) + "athlon-vector,athlon-load, + athlon-ieu, + athlon-ieu, + athlon-store") + +(define_insn_reservation "athlon_idirect_store" 1 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "store")))) + "athlon-direct,(athlon-ieu+athlon-agu), + athlon-store") +(define_insn_reservation "athlon_idirect_store_amdfam10" 1 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "store")))) + "athlon-direct,(athlon-ieu+athlon-agu), + athlon-store") + +(define_insn_reservation "athlon_ivector_store" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "store")))) + "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, + athlon-store") +(define_insn_reservation "athlon_ivector_store_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "store")))) + "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, + athlon-store") + +;; Athlon floatin point unit +(define_insn_reservation "athlon_fldxf" 12 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load") + (eq_attr "mode" "XF")))) + "athlon-vector,athlon-fpload2,athlon-fvector*9") +(define_insn_reservation "athlon_fldxf_k8" 13 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load") + (eq_attr "mode" "XF")))) + "athlon-vector,athlon-fpload2k8,athlon-fvector*9") +;; Assume superforwarding to take place so effective latency of fany op is 0. +(define_insn_reservation "athlon_fld" 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fany") +(define_insn_reservation "athlon_fld_k8" 2 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fstore") + +(define_insn_reservation "athlon_fstxf" 10 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "store,both") + (eq_attr "mode" "XF")))) + "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))") +(define_insn_reservation "athlon_fstxf_k8" 8 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "store,both") + (eq_attr "mode" "XF")))) + "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))") +(define_insn_reservation "athlon_fst" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "store,both"))) + "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") +(define_insn_reservation "athlon_fst_k8" 2 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "store,both"))) + "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") +(define_insn_reservation "athlon_fist" 4 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (eq_attr "type" "fistp,fisttp")) + "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") +(define_insn_reservation "athlon_fmov" 2 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (eq_attr "type" "fmov")) + "athlon-direct,athlon-fpsched,athlon-faddmul") +(define_insn_reservation "athlon_fadd_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fop") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_fadd_load_k8" 6 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "fop") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_fadd" 4 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (eq_attr "type" "fop")) + "athlon-direct,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_fmul_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmul") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fmul") +(define_insn_reservation "athlon_fmul_load_k8" 6 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "fmul") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fmul") +(define_insn_reservation "athlon_fmul" 4 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (eq_attr "type" "fmul")) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_fsgn" 2 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (eq_attr "type" "fsgn")) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_fdiv_load" 24 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fmul") +(define_insn_reservation "athlon_fdiv_load_k8" 13 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fmul") +(define_insn_reservation "athlon_fdiv" 24 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fdiv")) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_fdiv_k8" 11 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (eq_attr "type" "fdiv")) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_fpspc_load" 103 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "type" "fpspc") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload,athlon-fvector") +(define_insn_reservation "athlon_fpspc" 100 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (eq_attr "type" "fpspc")) + "athlon-vector,athlon-fpsched,athlon-fvector") +(define_insn_reservation "athlon_fcmov_load" 7 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmov") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload,athlon-fvector") +(define_insn_reservation "athlon_fcmov" 7 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fcmov")) + "athlon-vector,athlon-fpsched,athlon-fvector") +(define_insn_reservation "athlon_fcmov_load_k8" 17 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "fcmov") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fploadk8,athlon-fvector") +(define_insn_reservation "athlon_fcmov_k8" 15 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (eq_attr "type" "fcmov")) + "athlon-vector,athlon-fpsched,athlon-fvector") +;; fcomi is vector decoded by uses only one pipe. +(define_insn_reservation "athlon_fcomi_load" 3 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (and (eq_attr "athlon_decode" "vector") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_fcomi_load_k8" 5 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "fcmp") + (and (eq_attr "athlon_decode" "vector") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_fcomi" 3 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "athlon_decode" "vector") + (eq_attr "type" "fcmp"))) + "athlon-vector,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_fcom_load" 2 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_fcom_load_k8" 4 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "fcmp") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_fcom" 2 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (eq_attr "type" "fcmp")) + "athlon-direct,athlon-fpsched,athlon-fadd") +;; Never seen by the scheduler because we still don't do post reg-stack +;; scheduling. +;(define_insn_reservation "athlon_fxch" 2 +; (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") +; (eq_attr "type" "fxch")) +; "athlon-direct,athlon-fpsched,athlon-fany") + +;; Athlon handle MMX operations in the FPU unit with shorter latencies + +(define_insn_reservation "athlon_movlpd_load" 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemov") + (match_operand:DF 1 "memory_operand" ""))) + "athlon-direct,athlon-fpload,athlon-fany") +(define_insn_reservation "athlon_movlpd_load_k8" 2 + (and (eq_attr "cpu" "k8") + (and (eq_attr "type" "ssemov") + (match_operand:DF 1 "memory_operand" ""))) + "athlon-direct,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_movsd_load_generic64" 2 + (and (eq_attr "cpu" "generic64") + (and (eq_attr "type" "ssemov") + (match_operand:DF 1 "memory_operand" ""))) + "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)") +(define_insn_reservation "athlon_movaps_load_k8" 2 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "load")))) + "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore") +(define_insn_reservation "athlon_movaps_load" 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)") +(define_insn_reservation "athlon_movss_load" 1 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "SF,DI") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-fpload,(athlon-fany*2)") +(define_insn_reservation "athlon_movss_load_k8" 1 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "SF,DI") + (eq_attr "memory" "load")))) + "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)") +(define_insn_reservation "athlon_mmxsseld" 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "mmxmov,ssemov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fany") +(define_insn_reservation "athlon_mmxsseld_k8" 2 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "mmxmov,ssemov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fstore") +;; On AMDFAM10 all double, single and integer packed and scalar SSEx data +;; loads generated are direct path, latency of 2 and do not use any FP +;; executions units. No separate entries for movlpx/movhpx loads, which +;; are direct path, latency of 4 and use the FADD/FMUL FP execution units, +;; as they will not be generated. +(define_insn_reservation "athlon_sseld_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssemov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8") +;; On AMDFAM10 MMX data loads generated are direct path, latency of 4 +;; and can use any FP executions units +(define_insn_reservation "athlon_mmxld_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "mmxmov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8, athlon-fany") +(define_insn_reservation "athlon_mmxssest" 3 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "mmxmov,ssemov") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "store,both")))) + "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") +(define_insn_reservation "athlon_mmxssest_k8" 3 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "mmxmov,ssemov") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "store,both")))) + "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") +(define_insn_reservation "athlon_mmxssest_short" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "mmxmov,ssemov") + (eq_attr "memory" "store,both"))) + "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") +;; On AMDFAM10 all double, single and integer packed SSEx data stores +;; generated are all double path, latency of 2 and use the FSTORE FP +;; execution unit. No entries separate for movupx/movdqu, which are +;; vector path, latency of 3 and use the FSTORE*2 FP execution unit, +;; as they will not be generated. +(define_insn_reservation "athlon_ssest_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssemov") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "store,both")))) + "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)") +;; On AMDFAM10 all double, single and integer scalar SSEx and MMX +;; data stores generated are all direct path, latency of 2 and use +;; the FSTORE FP execution unit +(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "mmxmov,ssemov") + (eq_attr "memory" "store,both"))) + "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") +(define_insn_reservation "athlon_movaps_k8" 2 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssemov") + (eq_attr "mode" "V4SF,V2DF,TI"))) + "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))") +(define_insn_reservation "athlon_movaps" 2 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemov") + (eq_attr "mode" "V4SF,V2DF,TI"))) + "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)") +(define_insn_reservation "athlon_mmxssemov" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "mmxmov,ssemov")) + "athlon-direct,athlon-fpsched,athlon-faddmul") +(define_insn_reservation "athlon_mmxmul_load" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "mmxmul") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-fmul") +(define_insn_reservation "athlon_mmxmul" 3 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "mmxmul")) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_mmx_load" 3 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "unit" "mmx") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fpload,athlon-faddmul") +(define_insn_reservation "athlon_mmx" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "unit" "mmx")) + "athlon-direct,athlon-fpsched,athlon-faddmul") +;; SSE operations are handled by the i387 unit as well. The latency +;; is same as for i387 operations for scalar operations + +(define_insn_reservation "athlon_sselog_load" 3 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sselog,sselog1") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload2,(athlon-fmul*2)") +(define_insn_reservation "athlon_sselog_load_k8" 5 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "sselog,sselog1") + (eq_attr "memory" "load"))) + "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") +(define_insn_reservation "athlon_sselog_load_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sselog,sselog1") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)") +(define_insn_reservation "athlon_sselog" 3 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "sselog,sselog1")) + "athlon-vector,athlon-fpsched,athlon-fmul*2") +(define_insn_reservation "athlon_sselog_k8" 3 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "sselog,sselog1")) + "athlon-double,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_sselog_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "sselog,sselog1")) + "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)") + +;; ??? pcmp executes in addmul, probably not worthwhile to bother about that. +(define_insn_reservation "athlon_ssecmp_load" 2 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "mode" "SF,DF,DI") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_ssecmp_load_k8" 4 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "mode" "SF,DF,DI,TI") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_ssecmp" 2 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "type" "ssecmp") + (eq_attr "mode" "SF,DF,DI,TI"))) + "athlon-direct,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_ssecmpvector_load" 3 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload2,(athlon-fadd*2)") +(define_insn_reservation "athlon_ssecmpvector_load_k8" 5 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load"))) + "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") +(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_ssecmpvector" 3 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "ssecmp")) + "athlon-vector,athlon-fpsched,(athlon-fadd*2)") +(define_insn_reservation "athlon_ssecmpvector_k8" 3 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "ssecmp")) + "athlon-double,athlon-fpsched,(athlon-fadd*2)") +(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "ssecmp")) + "athlon-direct,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_ssecomi_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_ssecomi_load_k8" 6 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_ssecomi" 4 + (and (eq_attr "cpu" "athlon,k8,generic64") + (eq_attr "type" "ssecomi")) + "athlon-vector,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_ssecomi_amdfam10" 3 + (and (eq_attr "cpu" "amdfam10") +;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10 + (eq_attr "type" "ssecomi")) + "athlon-direct,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_sseadd_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseadd") + (and (eq_attr "mode" "SF,DF,DI") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fpload,athlon-fadd") +(define_insn_reservation "athlon_sseadd_load_k8" 6 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "sseadd") + (and (eq_attr "mode" "SF,DF,DI") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_sseadd" 4 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "type" "sseadd") + (eq_attr "mode" "SF,DF,DI"))) + "athlon-direct,athlon-fpsched,athlon-fadd") +(define_insn_reservation "athlon_sseaddvector_load" 5 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseadd") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload2,(athlon-fadd*2)") +(define_insn_reservation "athlon_sseaddvector_load_k8" 7 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "sseadd") + (eq_attr "memory" "load"))) + "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") +(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseadd") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fadd") +(define_insn_reservation "athlon_sseaddvector" 5 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "sseadd")) + "athlon-vector,athlon-fpsched,(athlon-fadd*2)") +(define_insn_reservation "athlon_sseaddvector_k8" 5 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "sseadd")) + "athlon-double,athlon-fpsched,(athlon-fadd*2)") +(define_insn_reservation "athlon_sseaddvector_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "sseadd")) + "athlon-direct,athlon-fpsched,athlon-fadd") + +;; Conversions behaves very irregularly and the scheduling is critical here. +;; Take each instruction separately. Assume that the mode is always set to the +;; destination one and athlon_decode is set to the K8 versions. + +;; cvtss2sd +(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4 + (and (eq_attr "cpu" "k8,athlon,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "mode" "DF") + (eq_attr "memory" "load"))))) + "athlon-direct,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "DF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") +(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "direct") + (eq_attr "mode" "DF")))) + "athlon-direct,athlon-fpsched,athlon-fstore") +(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "vector") + (eq_attr "mode" "DF")))) + "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)") +;; cvtps2pd. Model same way the other double decoded FP conversions. +(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5 + (and (eq_attr "cpu" "k8,athlon,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "V2DF,V4SF,TI") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fpload2k8,(athlon-fstore*2)") +(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "direct") + (and (eq_attr "mode" "V2DF,V4SF,TI") + (eq_attr "memory" "load"))))) + "athlon-direct,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3 + (and (eq_attr "cpu" "k8,athlon,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "double") + (eq_attr "mode" "V2DF,V4SF,TI")))) + "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore") +(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "direct") + (eq_attr "mode" "V2DF,V4SF,TI")))) + "athlon-direct,athlon-fpsched,athlon-fstore") +;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath) +;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6 +(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6 + (and (eq_attr "cpu" "athlon,k8") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load"))))) + "athlon-direct,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") +;; cvtsi2ss mem, reg is doublepath +(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load"))))) + "athlon-vector,athlon-fpload,(athlon-fstore*2)") +(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-fstore*2)") +(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") +;; cvtsi2sd reg,reg is double decoded (vector on Athlon) +(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11 + (and (eq_attr "cpu" "k8,athlon,generic64") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "none"))))) + "athlon-double,athlon-fploadk8,athlon-fstore") +(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") +;; cvtsi2ss reg, reg is doublepath +(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fploadk8,(athlon-fvector*2)") +(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") +;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9 +(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9 + (and (eq_attr "cpu" "k8,athlon,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-fstore*3)") +(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "SF") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") +;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12 +(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "mode" "SF") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fpsched,(athlon-fvector*3)") +(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "vector") + (and (eq_attr "mode" "SF") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)") +(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fpload2k8,(athlon-fstore*3)") +(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") +;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10 +;; ??? Why it is fater than cvtsd2ss? +(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fpsched,athlon-fvector*2") +(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssecvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "V4SF,V2DF,TI") + (eq_attr "memory" "none"))))) + "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)") +;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9 +(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9 + (and (eq_attr "cpu" "athlon,k8,generic64") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "mode" "SI,DI") + (eq_attr "memory" "load"))))) + "athlon-vector,athlon-fploadk8,athlon-fvector") +(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "SI,DI") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)") +;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9 +(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SI,DI") + (eq_attr "memory" "none"))))) + "athlon-vector,athlon-fpsched,athlon-fvector") +(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "athlon_decode" "double") + (and (eq_attr "mode" "SI,DI") + (eq_attr "memory" "none"))))) + "athlon-double,athlon-fpsched,athlon-fstore") +(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "SI,DI") + (eq_attr "memory" "none"))))) + "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)") +;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10 +(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "TI") + (eq_attr "memory" "load"))))) + "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") +;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10 +(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseicvt") + (and (eq_attr "amdfam10_decode" "double") + (and (eq_attr "mode" "TI") + (eq_attr "memory" "none"))))) + "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)") + + +(define_insn_reservation "athlon_ssemul_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemul") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fpload,athlon-fmul") +(define_insn_reservation "athlon_ssemul_load_k8" 6 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "ssemul") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fploadk8,athlon-fmul") +(define_insn_reservation "athlon_ssemul" 4 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "SF,DF"))) + "athlon-direct,athlon-fpsched,athlon-fmul") +(define_insn_reservation "athlon_ssemulvector_load" 5 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload2,(athlon-fmul*2)") +(define_insn_reservation "athlon_ssemulvector_load_k8" 7 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) + "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") +(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fmul") +(define_insn_reservation "athlon_ssemulvector" 5 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "ssemul")) + "athlon-vector,athlon-fpsched,(athlon-fmul*2)") +(define_insn_reservation "athlon_ssemulvector_k8" 5 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "ssemul")) + "athlon-double,athlon-fpsched,(athlon-fmul*2)") +(define_insn_reservation "athlon_ssemulvector_amdfam10" 4 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "ssemul")) + "athlon-direct,athlon-fpsched,athlon-fmul") +;; divsd timings. divss is faster +(define_insn_reservation "athlon_ssediv_load" 20 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fpload,athlon-fmul*17") +(define_insn_reservation "athlon_ssediv_load_k8" 22 + (and (eq_attr "cpu" "k8,generic64,amdfam10") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-fploadk8,athlon-fmul*17") +(define_insn_reservation "athlon_ssediv" 20 + (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") + (and (eq_attr "type" "ssediv") + (eq_attr "mode" "SF,DF"))) + "athlon-direct,athlon-fpsched,athlon-fmul*17") +(define_insn_reservation "athlon_ssedivvector_load" 39 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fpload2,athlon-fmul*34") +(define_insn_reservation "athlon_ssedivvector_load_k8" 35 + (and (eq_attr "cpu" "k8,generic64") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) + "athlon-double,athlon-fpload2k8,athlon-fmul*34") +(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fploadk8,athlon-fmul*17") +(define_insn_reservation "athlon_ssedivvector" 39 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "ssediv")) + "athlon-vector,athlon-fmul*34") +(define_insn_reservation "athlon_ssedivvector_k8" 39 + (and (eq_attr "cpu" "k8,generic64") + (eq_attr "type" "ssediv")) + "athlon-double,athlon-fmul*34") +(define_insn_reservation "athlon_ssedivvector_amdfam10" 20 + (and (eq_attr "cpu" "amdfam10") + (eq_attr "type" "ssediv")) + "athlon-direct,athlon-fmul*17") +(define_insn_reservation "athlon_sseins_amdfam10" 5 + (and (eq_attr "cpu" "amdfam10") + (and (eq_attr "type" "sseins") + (eq_attr "mode" "TI"))) + "athlon-vector,athlon-fpsched,athlon-faddmul") |