From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- gcc/config/mips/sr71k.md | 337 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 gcc/config/mips/sr71k.md (limited to 'gcc/config/mips/sr71k.md') diff --git a/gcc/config/mips/sr71k.md b/gcc/config/mips/sr71k.md new file mode 100644 index 000000000..9b2a784b1 --- /dev/null +++ b/gcc/config/mips/sr71k.md @@ -0,0 +1,337 @@ +;; Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; +;; ......................... +;; +;; DFA-based pipeline description for Sandcraft SR3 (MIPS64 based) +;; +;; The SR3 is described as: +;; - nine-stage pipeline, insn buffering with out-of-order issue to +;; multiple function units, with an average dispatch rate of 2 +;; insn.s per cycle (max 6 insns: 2 fpu, 4 cpu). +;; +;; The details on this are scant except for a diagram in +;; Chap. 6 of Rev. 1.0 SR3 Spec. +;; +;; The model employed below is designed to closely approximate the +;; published latencies. Emulation of out-of-order issue and the insn +;; buffering is done via a VLIW dispatch style (with a packing of 6 insns); +;; the function unit reservations restrictions (define_*_set) are +;; contrived to support published timings. +;; +;; Reference: +;; "SR3 Microprocessor Specification, System development information," +;; Revision 1.0, 13 December 2000. +;; +;; +;; Reservation model is based on: +;; 1) Figure 6-1, from the 1.0 specification. +;; 2) Chapter 19, from the 1.0 specification. +;; 3) following questions(Red Hat)/answers(Sandcraft): +;; RH> From Section 19.1 +;; RH> 1) In terms of figure 6-1, are all the instructions in +;; RH> table 19-1 restricted +;; RH> to ALUx? When ALUx is not in use for an instruction in table;; RH> 19-1 is +;; RH> it fully compatible with all insns that issue to ALUy? +;; +;; Yes, all the instructions in Table 19-1 only go to ALUX, and all the +;; instructions that can be issued to ALUY can also be issued to ALUX. +;; +;; +;; RH> From Section 19.2 +;; RH> 2) Explain conditional moves execution path (in terms of +;; RH> figure 6-1) +;; +;; Conditional move of integer registers (based on floating point condition +;; codes or integer register value) go to ALUX or ALUY. +;; +;; RH> 3) Explain floating point store execution path (in terms of +;; RH> figure 6-1) +;; +;; Floating point stores go to Ld/St and go to MOV in the floating point +;; pipeline. +;; +;; Floating point loads go to Ld/St and go to LOAD in the floating point +;; pipeline. +;; +;; RH> 4) Explain branch on floating condition (in terms of figure 6-1);; +;; Branch on floating condition go to BRU. +;; +;; RH> 5) Is the column for single RECIP instruction latency correct? +;; RH> What about for RSQRT single and double? +;; +;; The latency/repeat for RECIP and RSQRT are correct. +;; + +;; +;; Use four automata to isolate long latency operations, and to +;; reduce the complexity of cpu+fpu, reducing space. +;; +(define_automaton "sr71_cpu, sr71_cpu1, sr71_cp1, sr71_cp2, sr71_fextra, sr71_imacc") + +;; feeders for CPU function units and feeders for fpu (CP1 interface) +(define_cpu_unit "sr_iss0,sr_iss1,sr_iss2,sr_iss3,sr_iss4,sr_iss5" "sr71_cpu") + +;; CPU function units +(define_cpu_unit "ipu_bru" "sr71_cpu1") +(define_cpu_unit "ipu_alux" "sr71_cpu1") +(define_cpu_unit "ipu_aluy" "sr71_cpu1") +(define_cpu_unit "ipu_ldst" "sr71_cpu1") +(define_cpu_unit "ipu_macc_iter" "sr71_imacc") + + +;; Floating-point unit (Co-processor interface 1). +(define_cpu_unit "fpu_mov" "sr71_cp1") +(define_cpu_unit "fpu_load" "sr71_cp1") +(define_cpu_unit "fpu_fpu" "sr71_cp2") + +;; fictitous unit to track long float insns with separate automaton +(define_cpu_unit "fpu_iter" "sr71_fextra") + + +;; +;; Define common execution path (reservation) combinations +;; + +;; +(define_reservation "cpu_iss" "sr_iss0|sr_iss1|sr_iss2|sr_iss3") + +;; two cycles are used for instruction using the fpu as it runs +;; at half the clock speed of the cpu. By adding an extra cycle +;; to the issue units, the default/minimum "repeat" dispatch delay is +;; accounted for all insn.s +(define_reservation "cp1_iss" "(sr_iss4*2)|(sr_iss5*2)") + +(define_reservation "serial_dispatch" "sr_iss0+sr_iss1+sr_iss2+sr_iss3+sr_iss4+sr_iss5") + +;; Simulate a 6 insn VLIW dispatch, 1 cycle in dispatch followed by +;; reservation of function unit. +(define_reservation "ri_insns" "cpu_iss,(ipu_alux|ipu_aluy)") +(define_reservation "ri_mem" "cpu_iss,ipu_ldst") +(define_reservation "ri_alux" "cpu_iss,ipu_alux") +(define_reservation "ri_branch" "cpu_iss,ipu_bru") + +(define_reservation "rf_insn" "cp1_iss,fpu_fpu") +(define_reservation "rf_ldmem" "cp1_iss,fpu_load") + +; simultaneous reservation of pseudo-unit keeps cp1 fpu tied +; up until long cycle insn is finished... +(define_reservation "rf_multi1" "rf_insn+fpu_iter") + +;; +;; The ordering of the instruction-execution-path/resource-usage +;; descriptions (also known as reservation RTL) is roughly ordered +;; based on the define attribute RTL for the "type" classification. +;; When modifying, remember that the first test that matches is the +;; reservation used! +;; + + +(define_insn_reservation "ir_sr70_unknown" 1 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "unknown")) + "serial_dispatch") + + +;; Assume prediction fails. +(define_insn_reservation "ir_sr70_branch" 6 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "branch,jump,call")) + "ri_branch") + +(define_insn_reservation "ir_sr70_load" 2 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "load")) + "ri_mem") + +(define_insn_reservation "ir_sr70_store" 1 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "store")) + "ri_mem") + + +;; +;; float loads/stores flow through both cpu and cp1... +;; +(define_insn_reservation "ir_sr70_fload" 9 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "fpload,fpidxload")) + "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)") + +(define_insn_reservation "ir_sr70_fstore" 1 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "fpstore,fpidxstore")) + "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)") + + +;; This reservation is for conditional move based on integer +;; or floating point CC. +(define_insn_reservation "ir_sr70_condmove" 4 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "condmove")) + "ri_insns") + +;; Try to discriminate move-from-cp1 versus move-to-cp1 as latencies +;; are different. Like float load/store, these insns use multiple +;; resources simultaneously +(define_insn_reservation "ir_sr70_xfer_from" 6 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "mfc")) + "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)") + +(define_insn_reservation "ir_sr70_xfer_to" 9 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "mtc")) + "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)") + +(define_insn_reservation "ir_sr70_hilo" 1 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "mthilo,mfhilo")) + "ri_insns") + +(define_insn_reservation "ir_sr70_arith" 1 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,trap")) + "ri_insns") + +;; emulate repeat (dispatch stall) by spending extra cycle(s) in +;; in iter unit +(define_insn_reservation "ir_sr70_imul_si" 4 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "imul,imul3,imadd") + (eq_attr "mode" "SI"))) + "ri_alux,ipu_alux,ipu_macc_iter") + +(define_insn_reservation "ir_sr70_imul_di" 6 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "imul,imul3,imadd") + (eq_attr "mode" "DI"))) + "ri_alux,ipu_alux,(ipu_macc_iter*3)") + +;; Divide algorithm is early out with best latency of 7 pcycles. +;; Use worst case for scheduling purposes. +(define_insn_reservation "ir_sr70_idiv_si" 41 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "SI"))) + "ri_alux,ipu_alux,(ipu_macc_iter*38)") + +(define_insn_reservation "ir_sr70_idiv_di" 73 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "DI"))) + "ri_alux,ipu_alux,(ipu_macc_iter*70)") + +;; extra reservations of fpu_fpu are for repeat latency +(define_insn_reservation "ir_sr70_fadd_sf" 8 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "fadd") + (eq_attr "mode" "SF"))) + "rf_insn,fpu_fpu") + +(define_insn_reservation "ir_sr70_fadd_df" 10 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "fadd") + (eq_attr "mode" "DF"))) + "rf_insn,fpu_fpu") + +;; Latencies for MADD,MSUB, NMADD, NMSUB assume the Multiply is fused +;; with the sub or add. +(define_insn_reservation "ir_sr70_fmul_sf" 8 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "fmul,fmadd") + (eq_attr "mode" "SF"))) + "rf_insn,fpu_fpu") + +;; tie up the fpu unit to emulate the balance for the "repeat +;; rate" of 8 (2 are spent in the iss unit) +(define_insn_reservation "ir_sr70_fmul_df" 16 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "fmul,fmadd") + (eq_attr "mode" "DF"))) + "rf_insn,fpu_fpu*6") + + +;; RECIP insn uses same type attr as div, and for SR3, has same +;; timings for double. However, single RECIP has a latency of +;; 28 -- only way to fix this is to introduce new insn attrs. +;; cycles spent in iter unit are designed to satisfy balance +;; of "repeat" latency after insn uses up rf_multi1 reservation +(define_insn_reservation "ir_sr70_fdiv_sf" 60 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "fdiv,frdiv") + (eq_attr "mode" "SF"))) + "rf_multi1+(fpu_iter*51)") + +(define_insn_reservation "ir_sr70_fdiv_df" 120 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "fdiv,frdiv") + (eq_attr "mode" "DF"))) + "rf_multi1+(fpu_iter*109)") + +(define_insn_reservation "ir_sr70_fabs" 4 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "fabs,fneg,fmove")) + "rf_insn,fpu_fpu") + +(define_insn_reservation "ir_sr70_fcmp" 10 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "fcmp")) + "rf_insn,fpu_fpu") + +;; "fcvt" type attribute covers a number of diff insns, most have the same +;; latency descriptions, a few vary. We use the +;; most common timing (which is also worst case). +(define_insn_reservation "ir_sr70_fcvt" 12 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "fcvt")) + "rf_insn,fpu_fpu*4") + +(define_insn_reservation "ir_sr70_fsqrt_sf" 62 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "fsqrt") + (eq_attr "mode" "SF"))) + "rf_multi1+(fpu_iter*53)") + +(define_insn_reservation "ir_sr70_fsqrt_df" 122 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "fsqrt") + (eq_attr "mode" "DF"))) + "rf_multi1+(fpu_iter*111)") + +(define_insn_reservation "ir_sr70_frsqrt_sf" 48 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "frsqrt") + (eq_attr "mode" "SF"))) + "rf_multi1+(fpu_iter*39)") + +(define_insn_reservation "ir_sr70_frsqrt_df" 240 + (and (eq_attr "cpu" "sr71000") + (and (eq_attr "type" "frsqrt") + (eq_attr "mode" "DF"))) + "rf_multi1+(fpu_iter*229)") + +(define_insn_reservation "ir_sr70_multi" 1 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "multi")) + "serial_dispatch") + +(define_insn_reservation "ir_sr70_nop" 1 + (and (eq_attr "cpu" "sr71000") + (eq_attr "type" "nop")) + "ri_insns") -- cgit v1.2.3