From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- gcc/config/sh/constraints.md | 265 + gcc/config/sh/crt1.asm | 1369 ++++ gcc/config/sh/crti.asm | 125 + gcc/config/sh/crtn.asm | 77 + gcc/config/sh/divcost-analysis | 88 + gcc/config/sh/divtab-sh4-300.c | 77 + gcc/config/sh/divtab-sh4.c | 85 + gcc/config/sh/divtab.c | 200 + gcc/config/sh/elf.h | 90 + gcc/config/sh/embed-elf.h | 36 + gcc/config/sh/lib1funcs-4-300.asm | 936 +++ gcc/config/sh/lib1funcs-Os-4-200.asm | 322 + gcc/config/sh/lib1funcs.asm | 3933 ++++++++++ gcc/config/sh/lib1funcs.h | 76 + gcc/config/sh/libgcc-excl.ver | 8 + gcc/config/sh/libgcc-glibc.ver | 48 + gcc/config/sh/linux-atomic.asm | 223 + gcc/config/sh/linux-unwind.h | 256 + gcc/config/sh/linux.h | 137 + gcc/config/sh/little.h | 21 + gcc/config/sh/netbsd-elf.h | 117 + gcc/config/sh/newlib.h | 25 + gcc/config/sh/predicates.md | 833 ++ gcc/config/sh/rtems.h | 26 + gcc/config/sh/rtemself.h | 26 + gcc/config/sh/sh-c.c | 68 + gcc/config/sh/sh-modes.def | 34 + gcc/config/sh/sh-protos.h | 186 + gcc/config/sh/sh-symbian.h | 42 + gcc/config/sh/sh.c | 12610 ++++++++++++++++++++++++++++++ gcc/config/sh/sh.h | 2511 ++++++ gcc/config/sh/sh.md | 13490 +++++++++++++++++++++++++++++++++ gcc/config/sh/sh.opt | 338 + gcc/config/sh/sh1.md | 85 + gcc/config/sh/sh4-300.md | 287 + gcc/config/sh/sh4.md | 486 ++ gcc/config/sh/sh4a.md | 236 + gcc/config/sh/sh64.h | 26 + gcc/config/sh/shmedia.h | 30 + gcc/config/sh/shmedia.md | 94 + gcc/config/sh/sshmedia.h | 78 + gcc/config/sh/superh.h | 107 + gcc/config/sh/superh.opt | 10 + gcc/config/sh/symbian-base.c | 244 + gcc/config/sh/symbian-c.c | 181 + gcc/config/sh/symbian-cxx.c | 662 ++ gcc/config/sh/symbian-post.h | 88 + gcc/config/sh/symbian-pre.h | 40 + gcc/config/sh/t-elf | 10 + gcc/config/sh/t-linux | 8 + gcc/config/sh/t-linux64 | 1 + gcc/config/sh/t-netbsd | 31 + gcc/config/sh/t-netbsd-sh5-64 | 1 + gcc/config/sh/t-rtems | 7 + gcc/config/sh/t-sh | 166 + gcc/config/sh/t-sh64 | 29 + gcc/config/sh/t-superh | 33 + gcc/config/sh/t-symbian | 81 + gcc/config/sh/t-vxworks | 9 + gcc/config/sh/ushmedia.h | 1087 +++ gcc/config/sh/vxworks.h | 69 + 61 files changed, 42864 insertions(+) create mode 100644 gcc/config/sh/constraints.md create mode 100644 gcc/config/sh/crt1.asm create mode 100644 gcc/config/sh/crti.asm create mode 100644 gcc/config/sh/crtn.asm create mode 100644 gcc/config/sh/divcost-analysis create mode 100644 gcc/config/sh/divtab-sh4-300.c create mode 100644 gcc/config/sh/divtab-sh4.c create mode 100644 gcc/config/sh/divtab.c create mode 100644 gcc/config/sh/elf.h create mode 100644 gcc/config/sh/embed-elf.h create mode 100644 gcc/config/sh/lib1funcs-4-300.asm create mode 100644 gcc/config/sh/lib1funcs-Os-4-200.asm create mode 100644 gcc/config/sh/lib1funcs.asm create mode 100644 gcc/config/sh/lib1funcs.h create mode 100644 gcc/config/sh/libgcc-excl.ver create mode 100644 gcc/config/sh/libgcc-glibc.ver create mode 100644 gcc/config/sh/linux-atomic.asm create mode 100644 gcc/config/sh/linux-unwind.h create mode 100644 gcc/config/sh/linux.h create mode 100644 gcc/config/sh/little.h create mode 100644 gcc/config/sh/netbsd-elf.h create mode 100644 gcc/config/sh/newlib.h create mode 100644 gcc/config/sh/predicates.md create mode 100644 gcc/config/sh/rtems.h create mode 100644 gcc/config/sh/rtemself.h create mode 100644 gcc/config/sh/sh-c.c create mode 100644 gcc/config/sh/sh-modes.def create mode 100644 gcc/config/sh/sh-protos.h create mode 100644 gcc/config/sh/sh-symbian.h create mode 100644 gcc/config/sh/sh.c create mode 100644 gcc/config/sh/sh.h create mode 100644 gcc/config/sh/sh.md create mode 100644 gcc/config/sh/sh.opt create mode 100644 gcc/config/sh/sh1.md create mode 100644 gcc/config/sh/sh4-300.md create mode 100644 gcc/config/sh/sh4.md create mode 100644 gcc/config/sh/sh4a.md create mode 100644 gcc/config/sh/sh64.h create mode 100644 gcc/config/sh/shmedia.h create mode 100644 gcc/config/sh/shmedia.md create mode 100644 gcc/config/sh/sshmedia.h create mode 100644 gcc/config/sh/superh.h create mode 100644 gcc/config/sh/superh.opt create mode 100644 gcc/config/sh/symbian-base.c create mode 100644 gcc/config/sh/symbian-c.c create mode 100644 gcc/config/sh/symbian-cxx.c create mode 100644 gcc/config/sh/symbian-post.h create mode 100644 gcc/config/sh/symbian-pre.h create mode 100644 gcc/config/sh/t-elf create mode 100644 gcc/config/sh/t-linux create mode 100644 gcc/config/sh/t-linux64 create mode 100644 gcc/config/sh/t-netbsd create mode 100644 gcc/config/sh/t-netbsd-sh5-64 create mode 100644 gcc/config/sh/t-rtems create mode 100644 gcc/config/sh/t-sh create mode 100644 gcc/config/sh/t-sh64 create mode 100644 gcc/config/sh/t-superh create mode 100644 gcc/config/sh/t-symbian create mode 100644 gcc/config/sh/t-vxworks create mode 100644 gcc/config/sh/ushmedia.h create mode 100644 gcc/config/sh/vxworks.h (limited to 'gcc/config/sh') diff --git a/gcc/config/sh/constraints.md b/gcc/config/sh/constraints.md new file mode 100644 index 000000000..6b0e5d27c --- /dev/null +++ b/gcc/config/sh/constraints.md @@ -0,0 +1,265 @@ +;; Constraint definitions for Renesas / SuperH SH. +;; Copyright (C) 2007, 2008 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Overview of uppercase letter constraints: +;; Bxx: miscellaneous constraints +;; Bsc: SCRATCH - for the scratch register in movsi_ie in the +;; fldi0 / fldi0 cases +;; Cxx: Constants other than only CONST_INT +;; Css: signed 16-bit constant, literal or symbolic +;; Csu: unsigned 16-bit constant, literal or symbolic +;; Csy: label or symbol +;; Cpg: non-explicit constants that can be directly loaded into a general +;; purpose register in PIC code. like 's' except we don't allow +;; PIC_ADDR_P +;; IJKLMNOP: CONT_INT constants +;; Ixx: signed xx bit +;; J16: 0xffffffff00000000 | 0x00000000ffffffff +;; Kxx: unsigned xx bit +;; M: 1 +;; N: 0 +;; P27: 1 | 2 | 8 | 16 +;; Pso: 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128 +;; Psz: ~1 | ~2 | ~4 | ~8 | ~16 | ~32 | ~64 | ~128 +;; Q: pc relative load operand +;; Rxx: reserved for exotic register classes. +;; Sxx: extra memory (storage) constraints +;; Sua: unaligned memory operations +;; W: vector +;; Z: zero in any mode +;; +;; unused CONST_INT constraint letters: LO +;; unused EXTRA_CONSTRAINT letters: D T U Y + +;; Register constraints +(define_register_constraint "a" "ALL_REGS" + "@internal") + +(define_register_constraint "b" "TARGET_REGS" + "Branch target registers.") + +(define_register_constraint "c" "FPSCR_REGS" + "Floating-point status register.") + +(define_register_constraint "d" "DF_REGS" + "Double precision floating-point register.") + +(define_register_constraint "e" "TARGET_FMOVD ? NO_REGS : FP_REGS" + "Floating-point register.") + +(define_register_constraint "f" "FP_REGS" + "Floating-point register.") + +(define_register_constraint "k" "SIBCALL_REGS" + "@internal") + +(define_register_constraint "l" "PR_REGS" + "PR register.") + +(define_register_constraint "t" "T_REGS" + "T register.") + +(define_register_constraint "w" "FP0_REGS" + "Floating-point register 0.") + +(define_register_constraint "x" "MAC_REGS" + "MACH and MACL registers.") + +(define_register_constraint "y" "FPUL_REGS" + "FPUL register.") + +(define_register_constraint "z" "R0_REGS" + "R0 register.") + +;; Integer constraints +(define_constraint "I06" + "A signed 6-bit constant, as used in SHmedia beqi, bnei and xori." + (and (match_code "const_int") + (match_test "ival >= -32 && ival <= 31"))) + +(define_constraint "I08" + "A signed 8-bit constant, as used in add, sub, etc." + (and (match_code "const_int") + (match_test "ival >= -128 && ival <= 127"))) + +(define_constraint "I10" + "A signed 10-bit constant, as used in in SHmedia andi, ori." + (and (match_code "const_int") + (match_test "ival >= -512 && ival <= 511"))) + +(define_constraint "I16" + "A signed 16-bit constant, as used in SHmedia movi." + (and (match_code "const_int") + (match_test "ival >= -32768 && ival <= 32767"))) + +(define_constraint "I20" + "A signed 20-bit constant, as used in SH2A movi20." + (and (match_code "const_int") + (match_test "ival >= -524288 && ival <= 524287") + (match_test "TARGET_SH2A"))) + +(define_constraint "I28" + "A signed 28-bit constant, as used in SH2A movi20s." + (and (match_code "const_int") + (match_test "ival >= -134217728 && ival <= 134217727") + (match_test "(ival & 255) == 0") + (match_test "TARGET_SH2A"))) +(define_constraint "J16" + "0xffffffff00000000 or 0x00000000ffffffff." + (and (match_code "const_int") + (match_test "CONST_OK_FOR_J16 (ival)"))) + +(define_constraint "K03" + "An unsigned 3-bit constant, as used in SH2A bclr, bset, etc." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 7"))) + +(define_constraint "K08" + "An unsigned 8-bit constant, as used in and, or, etc." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 255"))) + +(define_constraint "K12" + "An unsigned 8-bit constant, as used in SH2A 12-bit display." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 4095"))) + +(define_constraint "K16" + "An unsigned 16-bit constant, as used in SHmedia shori." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 65535"))) + +(define_constraint "P27" + "A constant for shift operand 1,2,8 or 16." + (and (match_code "const_int") + (match_test "ival == 1 || ival == 2 || ival == 8 || ival == 16"))) + +(define_constraint "M" + "Integer constant 1." + (and (match_code "const_int") + (match_test "ival == 1"))) + +(define_constraint "N" + "Integer constant 0." + (and (match_code "const_int") + (match_test "ival == 0"))) + +;; Floating-point constraints +(define_constraint "G" + "Double constant 0." + (and (match_code "const_double") + (match_test "fp_zero_operand (op) && fldi_ok ()"))) + +(define_constraint "H" + "Double constant 1." + (and (match_code "const_double") + (match_test "fp_one_operand (op) && fldi_ok ()"))) + +;; Extra constraints +(define_constraint "Q" + "A pc relative load operand." + (and (match_code "mem") + (match_test "IS_PC_RELATIVE_LOAD_ADDR_P (XEXP (op, 0))"))) + +(define_constraint "Bsc" + "Constraint for selecting FLDI0 or FLDI1 instruction. If the clobber + operand is not SCRATCH (i.e. REG) then R0 is probably being used, + hence mova is being used, hence do not select this pattern." + (match_code "scratch")) + +(define_constraint "Css" + "A signed 16-bit constant, literal or symbolic." + (and (match_code "const") + (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC") + (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_EXTRACT_S16"))) + +(define_constraint "Csu" + "An unsigned 16-bit constant, literal or symbolic." + (and (match_code "const") + (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC") + (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_EXTRACT_U16"))) + +(define_constraint "Csy" + "A label or a symbol." + (ior (match_test "NON_PIC_REFERENCE_P (op)") + (match_test "PIC_ADDR_P (op)"))) + +(define_constraint "Z" + "A zero in any shape or form." + (match_test "op == CONST0_RTX (GET_MODE (op))")) + +(define_constraint "W" + "Any vector constant we can handle." + (and (match_code "const_vector") + (ior (match_test "sh_rep_vec (op, VOIDmode)") + (match_test "HOST_BITS_PER_WIDE_INT >= 64 + ? sh_const_vec (op, VOIDmode) + : sh_1el_vec (op, VOIDmode)")))) + +(define_constraint "Cpg" + "A non-explicit constant that can be loaded directly into a general + purpose register. This is like 's' except we don't allow + PIC_ADDR_P." + (match_test "IS_NON_EXPLICIT_CONSTANT_P (op)")) + +(define_constraint "Pso" + "Integer constant with a single bit set in its lower 8-bit." + (and (match_code "const_int") + (ior (match_test "ival == 1") + (match_test "ival == 2") + (match_test "ival == 4") + (match_test "ival == 8") + (match_test "ival == 16") + (match_test "ival == 32") + (match_test "ival == 64") + (match_test "ival == 128")))) + +(define_constraint "Psz" + "Integer constant with a single zero bit in the lower 8-bit." + (and (match_code "const_int") + (ior (match_test "~ival == 1") + (match_test "~ival == 2") + (match_test "~ival == 4") + (match_test "~ival == 8") + (match_test "~ival == 16") + (match_test "~ival == 32") + (match_test "~ival == 64") + (match_test "~ival == 128")))) + +(define_memory_constraint "Sr0" + "@internal" + (and (match_test "memory_operand (op, GET_MODE (op))") + (match_test "!refers_to_regno_p (R0_REG, R0_REG + 1, op, (rtx *) 0)"))) + +(define_memory_constraint "Sua" + "@internal" + (and (match_test "memory_operand (op, GET_MODE (op))") + (match_test "GET_CODE (XEXP (op, 0)) != PLUS"))) + +(define_memory_constraint "Sbv" + "A memory reference, as used in SH2A bclr.b, bset.b, etc." + (and (match_test "MEM_P (op) && GET_MODE (op) == QImode") + (match_test "REG_P (XEXP (op, 0))"))) + +(define_memory_constraint "Sbw" + "A memory reference, as used in SH2A bclr.b, bset.b, etc." + (and (match_test "MEM_P (op) && GET_MODE (op) == QImode") + (match_test "GET_CODE (XEXP (op, 0)) == PLUS") + (match_test "REG_P (XEXP (XEXP (op, 0), 0))") + (match_test "satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1))"))) diff --git a/gcc/config/sh/crt1.asm b/gcc/config/sh/crt1.asm new file mode 100644 index 000000000..e2857904f --- /dev/null +++ b/gcc/config/sh/crt1.asm @@ -0,0 +1,1369 @@ +/* Copyright (C) 2000, 2001, 2003, 2004, 2005, 2006, 2009 + Free Software Foundation, Inc. + This file was pretty much copied from newlib. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +#ifdef MMU_SUPPORT + /* Section used for exception/timer interrupt stack area */ + .section .data.vbr.stack,"aw" + .align 4 + .global __ST_VBR +__ST_VBR: + .zero 1024 * 2 /* ; 2k for VBR handlers */ +/* Label at the highest stack address where the stack grows from */ +__timer_stack: +#endif /* MMU_SUPPORT */ + + /* ;---------------------------------------- + Normal newlib crt1.asm */ + +#ifdef __SH5__ + .section .data,"aw" + .global ___data +___data: + + .section .rodata,"a" + .global ___rodata +___rodata: + +#define ICCR_BASE 0x01600000 +#define OCCR_BASE 0x01e00000 +#define MMUIR_BASE 0x00000000 +#define MMUDR_BASE 0x00800000 + +#define PTE_ENABLED 1 +#define PTE_DISABLED 0 + +#define PTE_SHARED (1 << 1) +#define PTE_NOT_SHARED 0 + +#define PTE_CB_UNCACHEABLE 0 +#define PTE_CB_DEVICE 1 +#define PTE_CB_CACHEABLE_WB 2 +#define PTE_CB_CACHEABLE_WT 3 + +#define PTE_SZ_4KB (0 << 3) +#define PTE_SZ_64KB (1 << 3) +#define PTE_SZ_1MB (2 << 3) +#define PTE_SZ_512MB (3 << 3) + +#define PTE_PRR (1 << 6) +#define PTE_PRX (1 << 7) +#define PTE_PRW (1 << 8) +#define PTE_PRU (1 << 9) + +#define SR_MMU_BIT 31 +#define SR_BL_BIT 28 + +#define ALIGN_4KB (0xfff) +#define ALIGN_1MB (0xfffff) +#define ALIGN_512MB (0x1fffffff) + +#define DYNACON_BASE 0x0f000000 +#define DM_CB_DLINK_BASE 0x0c000000 +#define DM_DB_DLINK_BASE 0x0b000000 + +#define FEMI_AREA_0 0x00000000 +#define FEMI_AREA_1 0x04000000 +#define FEMI_AREA_2 0x05000000 +#define FEMI_AREA_3 0x06000000 +#define FEMI_AREA_4 0x07000000 +#define FEMI_CB 0x08000000 + +#define EMI_BASE 0X80000000 + +#define DMA_BASE 0X0e000000 + +#define CPU_BASE 0X0d000000 + +#define PERIPH_BASE 0X09000000 +#define DMAC_BASE 0x0e000000 +#define INTC_BASE 0x0a000000 +#define CPRC_BASE 0x0a010000 +#define TMU_BASE 0x0a020000 +#define SCIF_BASE 0x0a030000 +#define RTC_BASE 0x0a040000 + + + +#define LOAD_CONST32(val, reg) \ + movi ((val) >> 16) & 65535, reg; \ + shori (val) & 65535, reg + +#define LOAD_PTEH_VAL(sym, align, bits, scratch_reg, reg) \ + LOAD_ADDR (sym, reg); \ + LOAD_CONST32 ((align), scratch_reg); \ + andc reg, scratch_reg, reg; \ + LOAD_CONST32 ((bits), scratch_reg); \ + or reg, scratch_reg, reg + +#define LOAD_PTEL_VAL(sym, align, bits, scratch_reg, reg) \ + LOAD_ADDR (sym, reg); \ + LOAD_CONST32 ((align), scratch_reg); \ + andc reg, scratch_reg, reg; \ + LOAD_CONST32 ((bits), scratch_reg); \ + or reg, scratch_reg, reg + +#define SET_PTE(pte_addr_reg, pteh_val_reg, ptel_val_reg) \ + putcfg pte_addr_reg, 0, r63; \ + putcfg pte_addr_reg, 1, ptel_val_reg; \ + putcfg pte_addr_reg, 0, pteh_val_reg + +#if __SH5__ == 64 + .section .text,"ax" +#define LOAD_ADDR(sym, reg) \ + movi (sym >> 48) & 65535, reg; \ + shori (sym >> 32) & 65535, reg; \ + shori (sym >> 16) & 65535, reg; \ + shori sym & 65535, reg +#else + .mode SHmedia + .section .text..SHmedia32,"ax" +#define LOAD_ADDR(sym, reg) \ + movi (sym >> 16) & 65535, reg; \ + shori sym & 65535, reg +#endif + .global start +start: + LOAD_ADDR (_stack, r15) + +#ifdef MMU_SUPPORT + ! Set up the VM using the MMU and caches + + ! .vm_ep is first instruction to execute + ! after VM initialization + pt/l .vm_ep, tr1 + + ! Configure instruction cache (ICCR) + movi 3, r2 + movi 0, r3 + LOAD_ADDR (ICCR_BASE, r1) + putcfg r1, 0, r2 + putcfg r1, 1, r3 + + ! movi 7, r2 ! write through + ! Configure operand cache (OCCR) + LOAD_ADDR (OCCR_BASE, r1) + putcfg r1, 0, r2 + putcfg r1, 1, r3 + + ! Disable all PTE translations + LOAD_ADDR (MMUIR_BASE, r1) + LOAD_ADDR (MMUDR_BASE, r2) + movi 64, r3 + pt/l .disable_ptes_loop, tr0 +.disable_ptes_loop: + putcfg r1, 0, r63 + putcfg r2, 0, r63 + addi r1, 16, r1 + addi r2, 16, r2 + addi r3, -1, r3 + bgt r3, r63, tr0 + + LOAD_ADDR (MMUIR_BASE, r1) + + ! FEMI instruction mappings + ! Area 0 - 1Mb cacheable at 0x00000000 + ! Area 1 - None + ! Area 2 - 1Mb cacheable at 0x05000000 + ! - 1Mb cacheable at 0x05100000 + ! Area 3 - None + ! Area 4 - None + + ! Map a 1Mb page for instructions at 0x00000000 + LOAD_PTEH_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1Mb page for instructions at 0x05000000 + addi r1, 16, r1 + LOAD_PTEH_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1Mb page for instructions at 0x05100000 + addi r1, 16, r1 + LOAD_PTEH_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 512M page for instructions at EMI base + addi r1, 16, r1 + LOAD_PTEH_VAL (EMI_BASE, ALIGN_512MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (EMI_BASE, ALIGN_512MB, PTE_CB_CACHEABLE_WB | PTE_SZ_512MB | PTE_PRX | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for instructions at DM_DB_DLINK_BASE + addi r1, 16, r1 + LOAD_PTEH_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRX | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + LOAD_ADDR (MMUDR_BASE, r1) + + ! FEMI data mappings + ! Area 0 - 1Mb cacheable at 0x00000000 + ! Area 1 - 1Mb device at 0x04000000 + ! Area 2 - 1Mb cacheable at 0x05000000 + ! - 1Mb cacheable at 0x05100000 + ! Area 3 - None + ! Area 4 - None + ! CB - 1Mb device at 0x08000000 + + ! Map a 1Mb page for data at 0x00000000 + LOAD_PTEH_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1Mb page for data at 0x04000000 + addi r1, 16, r1 + LOAD_PTEH_VAL (FEMI_AREA_1, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_AREA_1, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1Mb page for data at 0x05000000 + addi r1, 16, r1 + LOAD_PTEH_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1Mb page for data at 0x05100000 + addi r1, 16, r1 + LOAD_PTEH_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for registers at 0x08000000 + addi r1, 16, r1 + LOAD_PTEH_VAL (FEMI_CB, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_CB, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 512M page for data at EMI + addi r1, 16, r1 + LOAD_PTEH_VAL (EMI_BASE, ALIGN_512MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (EMI_BASE, ALIGN_512MB, PTE_CB_CACHEABLE_WB | PTE_SZ_512MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for DYNACON at DYNACON_BASE + addi r1, 16, r1 + LOAD_PTEH_VAL (DYNACON_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DYNACON_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for instructions at DM_DB_DLINK_BASE + addi r1, 16, r1 + LOAD_PTEH_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for data at DM_DB_DLINK_BASE+0x1000 + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_DB_DLINK_BASE+0x1000), ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_DB_DLINK_BASE+0x1000), ALIGN_4KB, PTE_CB_UNCACHEABLE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for stack DM_DB_DLINK_BASE+0x2000 + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_DB_DLINK_BASE+0x2000), ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_DB_DLINK_BASE+0x2000), ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK + ! 0x0c000000 - 0x0c0fffff + addi r1, 16, r1 + LOAD_PTEH_VAL (DM_CB_DLINK_BASE, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DM_CB_DLINK_BASE, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK + ! 0x0c100000 - 0x0c1fffff + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x100000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK + ! 0x0c200000 - 0x0c2fffff + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x200000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x200000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK + ! 0x0c400000 - 0x0c4fffff + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x400000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x400000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK + ! 0x0c800000 - 0x0c8fffff + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x800000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x800000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for DMA control registers + addi r1, 16, r1 + LOAD_PTEH_VAL (DMA_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DMA_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map lots of 4K pages for peripherals + + ! /* peripheral */ + addi r1, 16, r1 + LOAD_PTEH_VAL (PERIPH_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (PERIPH_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* dmac */ + addi r1, 16, r1 + LOAD_PTEH_VAL (DMAC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DMAC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* intc */ + addi r1, 16, r1 + LOAD_PTEH_VAL (INTC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (INTC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* rtc */ + addi r1, 16, r1 + LOAD_PTEH_VAL (RTC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (RTC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* dmac */ + addi r1, 16, r1 + LOAD_PTEH_VAL (TMU_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (TMU_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* scif */ + addi r1, 16, r1 + LOAD_PTEH_VAL (SCIF_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (SCIF_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* cprc */ + addi r1, 16, r1 + LOAD_PTEH_VAL (CPRC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (CPRC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map CPU WPC registers + addi r1, 16, r1 + LOAD_PTEH_VAL (CPU_BASE, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (CPU_BASE, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + addi r1, 16, r1 + + LOAD_PTEH_VAL ((CPU_BASE+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((CPU_BASE+0x100000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + addi r1, 16, r1 + LOAD_PTEH_VAL ((CPU_BASE+0x200000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((CPU_BASE+0x200000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + addi r1, 16, r1 + LOAD_PTEH_VAL ((CPU_BASE+0x400000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((CPU_BASE+0x400000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Switch over to virtual addressing and enabled cache + getcon sr, r1 + movi 1, r2 + shlli r2, SR_BL_BIT, r2 + or r1, r2, r1 + putcon r1, ssr + getcon sr, r1 + movi 1, r2 + shlli r2, SR_MMU_BIT, r2 + or r1, r2, r1 + putcon r1, ssr + gettr tr1, r1 + putcon r1, spc + synco + rte + + ! VM entry point. From now on, we are in VM mode. +.vm_ep: + + ! Install the trap handler, by seeding vbr with the + ! correct value, and by assigning sr.bl = 0. + + LOAD_ADDR (vbr_start, r1) + putcon r1, vbr + movi ~(1<<28), r1 + getcon sr, r2 + and r1, r2, r2 + putcon r2, sr +#endif /* MMU_SUPPORT */ + + pt/l .Lzero_bss_loop, tr0 + pt/l _init, tr5 + pt/l ___setup_argv_and_call_main, tr6 + pt/l _exit, tr7 + + ! zero out bss + LOAD_ADDR (_edata, r0) + LOAD_ADDR (_end, r1) +.Lzero_bss_loop: + stx.q r0, r63, r63 + addi r0, 8, r0 + bgt/l r1, r0, tr0 + + LOAD_ADDR (___data, r26) + LOAD_ADDR (___rodata, r27) + +#ifdef __SH_FPU_ANY__ + getcon sr, r0 + ! enable the FP unit, by resetting SR.FD + ! also zero out SR.FR, SR.SZ and SR.PR, as mandated by the ABI + movi 0, r1 + shori 0xf000, r1 + andc r0, r1, r0 + putcon r0, sr +#if __SH5__ == 32 + pt/l ___set_fpscr, tr0 + movi 0, r4 + blink tr0, r18 +#endif +#endif + + ! arrange for exit to call fini + pt/l _atexit, tr1 + LOAD_ADDR (_fini, r2) + blink tr1, r18 + + ! call init + blink tr5, r18 + + ! call the mainline + blink tr6, r18 + + ! call exit + blink tr7, r18 + ! We should never return from _exit but in case we do we would enter the + ! the following tight loop. This avoids executing any data that might follow. +limbo: + pt/l limbo, tr0 + blink tr0, r63 + +#ifdef MMU_SUPPORT + ! All these traps are handled in the same place. + .balign 256 +vbr_start: + pt/l handler, tr0 ! tr0 trashed. + blink tr0, r63 + .balign 256 +vbr_100: + pt/l handler, tr0 ! tr0 trashed. + blink tr0, r63 +vbr_100_end: + .balign 256 +vbr_200: + pt/l handler, tr0 ! tr0 trashed. + blink tr0, r63 + .balign 256 +vbr_300: + pt/l handler, tr0 ! tr0 trashed. + blink tr0, r63 + .balign 256 +vbr_400: ! Should be at vbr+0x400 +handler: + /* If the trap handler is there call it */ + LOAD_ADDR (__superh_trap_handler, r2) + pta chandler,tr2 + beq r2, r63, tr2 /* If zero, ie not present branch around to chandler */ + /* Now call the trap handler with as much of the context unchanged as possible. + Move trapping address into R18 to make it look like the trap point */ + getcon spc, r18 + pt/l __superh_trap_handler, tr0 + blink tr0, r7 +chandler: + getcon spc, r62 + getcon expevt, r2 + pt/l _exit, tr0 + blink tr0, r63 + + /* Simulated trap handler */ + .section .text..SHmedia32,"ax" +gcc2_compiled.: + .section .debug_abbrev +.Ldebug_abbrev0: + .section .text..SHmedia32 +.Ltext0: + .section .debug_info +.Ldebug_info0: + .section .debug_line +.Ldebug_line0: + .section .text..SHmedia32,"ax" + .align 5 + .global __superh_trap_handler + .type __superh_trap_handler,@function +__superh_trap_handler: +.LFB1: + ptabs r18, tr0 + addi.l r15, -8, r15 + st.l r15, 4, r14 + addi.l r15, -8, r15 + add.l r15, r63, r14 + st.l r14, 0, r2 + ptabs r7, tr0 + addi.l r14, 8, r14 + add.l r14, r63, r15 + ld.l r15, 4, r14 + addi.l r15, 8, r15 + blink tr0, r63 +.LFE1: +.Lfe1: + .size __superh_trap_handler,.Lfe1-__superh_trap_handler + + .section .text..SHmedia32 +.Letext0: + + .section .debug_info + .ualong 0xa7 + .uaword 0x2 + .ualong .Ldebug_abbrev0 + .byte 0x4 + .byte 0x1 + .ualong .Ldebug_line0 + .ualong .Letext0 + .ualong .Ltext0 + .string "trap_handler.c" + + .string "xxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + .string "GNU C 2.97-sh5-010522" + + .byte 0x1 + .byte 0x2 + .ualong 0x9a + .byte 0x1 + .string "_superh_trap_handler" + + .byte 0x1 + .byte 0x2 + .byte 0x1 + .ualong .LFB1 + .ualong .LFE1 + .byte 0x1 + .byte 0x5e + .byte 0x3 + .string "trap_reason" + + .byte 0x1 + .byte 0x1 + .ualong 0x9a + .byte 0x2 + .byte 0x91 + .byte 0x0 + .byte 0x0 + .byte 0x4 + .string "unsigned int" + + .byte 0x4 + .byte 0x7 + .byte 0x0 + + .section .debug_abbrev + .byte 0x1 + .byte 0x11 + .byte 0x1 + .byte 0x10 + .byte 0x6 + .byte 0x12 + .byte 0x1 + .byte 0x11 + .byte 0x1 + .byte 0x3 + .byte 0x8 + .byte 0x1b + .byte 0x8 + .byte 0x25 + .byte 0x8 + .byte 0x13 + .byte 0xb + .byte 0,0 + .byte 0x2 + .byte 0x2e + .byte 0x1 + .byte 0x1 + .byte 0x13 + .byte 0x3f + .byte 0xc + .byte 0x3 + .byte 0x8 + .byte 0x3a + .byte 0xb + .byte 0x3b + .byte 0xb + .byte 0x27 + .byte 0xc + .byte 0x11 + .byte 0x1 + .byte 0x12 + .byte 0x1 + .byte 0x40 + .byte 0xa + .byte 0,0 + .byte 0x3 + .byte 0x5 + .byte 0x0 + .byte 0x3 + .byte 0x8 + .byte 0x3a + .byte 0xb + .byte 0x3b + .byte 0xb + .byte 0x49 + .byte 0x13 + .byte 0x2 + .byte 0xa + .byte 0,0 + .byte 0x4 + .byte 0x24 + .byte 0x0 + .byte 0x3 + .byte 0x8 + .byte 0xb + .byte 0xb + .byte 0x3e + .byte 0xb + .byte 0,0 + .byte 0 + + .section .debug_pubnames + .ualong 0x27 + .uaword 0x2 + .ualong .Ldebug_info0 + .ualong 0xab + .ualong 0x5b + .string "_superh_trap_handler" + + .ualong 0x0 + + .section .debug_aranges + .ualong 0x1c + .uaword 0x2 + .ualong .Ldebug_info0 + .byte 0x4 + .byte 0x0 + .uaword 0x0,0 + .ualong .Ltext0 + .ualong .Letext0-.Ltext0 + .ualong 0x0 + .ualong 0x0 + .ident "GCC: (GNU) 2.97-sh5-010522" +#endif /* MMU_SUPPORT */ +#else /* ! __SH5__ */ + + ! make a place to keep any previous value of the vbr register + ! this will only have a value if it has been set by redboot (for example) + .section .bss +old_vbr: + .long 0 +#ifdef PROFILE +profiling_enabled: + .long 0 +#endif + + + .section .text + .global start + .import ___rtos_profiler_start_timer + .weak ___rtos_profiler_start_timer +start: + mov.l stack_k,r15 + +#if defined (__SH3__) || (defined (__SH_FPU_ANY__) && ! defined (__SH2A__)) || defined (__SH4_NOFPU__) +#define VBR_SETUP + ! before zeroing the bss ... + ! if the vbr is already set to vbr_start then the program has been restarted + ! (i.e. it is not the first time the program has been run since reset) + ! reset the vbr to its old value before old_vbr (in bss) is wiped + ! this ensures that the later code does not create a circular vbr chain + stc vbr, r1 + mov.l vbr_start_k, r2 + cmp/eq r1, r2 + bf 0f + ! reset the old vbr value + mov.l old_vbr_k, r1 + mov.l @r1, r2 + ldc r2, vbr +0: +#endif /* VBR_SETUP */ + + ! zero out bss + mov.l edata_k,r0 + mov.l end_k,r1 + mov #0,r2 +start_l: + mov.l r2,@r0 + add #4,r0 + cmp/ge r0,r1 + bt start_l + +#if defined (__SH_FPU_ANY__) + mov.l set_fpscr_k, r1 + mov #4,r4 + jsr @r1 + shll16 r4 ! Set DN bit (flush denormal inputs to zero) + lds r3,fpscr ! Switch to default precision +#endif /* defined (__SH_FPU_ANY__) */ + +#ifdef VBR_SETUP + ! save the existing contents of the vbr + ! there will only be a prior value when using something like redboot + ! otherwise it will be zero + stc vbr, r1 + mov.l old_vbr_k, r2 + mov.l r1, @r2 + ! setup vbr + mov.l vbr_start_k, r1 + ldc r1,vbr +#endif /* VBR_SETUP */ + + ! if an rtos is exporting a timer start fn, + ! then pick up an SR which does not enable ints + ! (the rtos will take care of this) + mov.l rtos_start_fn, r0 + mov.l sr_initial_bare, r1 + tst r0, r0 + bt set_sr + + mov.l sr_initial_rtos, r1 + +set_sr: + ! Set status register (sr) + ldc r1, sr + + ! arrange for exit to call fini + mov.l atexit_k,r0 + mov.l fini_k,r4 + jsr @r0 + nop + +#ifdef PROFILE + ! arrange for exit to call _mcleanup (via stop_profiling) + mova stop_profiling,r0 + mov.l atexit_k,r1 + jsr @r1 + mov r0, r4 + + ! Call profiler startup code + mov.l monstartup_k, r0 + mov.l start_k, r4 + mov.l etext_k, r5 + jsr @r0 + nop + + ! enable profiling trap + ! until now any trap 33s will have been ignored + ! This means that all library functions called before this point + ! (directly or indirectly) may have the profiling trap at the start. + ! Therefore, only mcount itself may not have the extra header. + mov.l profiling_enabled_k2, r0 + mov #1, r1 + mov.l r1, @r0 +#endif /* PROFILE */ + + ! call init + mov.l init_k,r0 + jsr @r0 + nop + + ! call the mainline + mov.l main_k,r0 + jsr @r0 + nop + + ! call exit + mov r0,r4 + mov.l exit_k,r0 + jsr @r0 + nop + + .balign 4 +#ifdef PROFILE +stop_profiling: + # stop mcount counting + mov.l profiling_enabled_k2, r0 + mov #0, r1 + mov.l r1, @r0 + + # call mcleanup + mov.l mcleanup_k, r0 + jmp @r0 + nop + + .balign 4 +mcleanup_k: + .long __mcleanup +monstartup_k: + .long ___monstartup +profiling_enabled_k2: + .long profiling_enabled +start_k: + .long _start +etext_k: + .long __etext +#endif /* PROFILE */ + + .align 2 +#if defined (__SH_FPU_ANY__) +set_fpscr_k: + .long ___set_fpscr +#endif /* defined (__SH_FPU_ANY__) */ + +stack_k: + .long _stack +edata_k: + .long _edata +end_k: + .long _end +main_k: + .long ___setup_argv_and_call_main +exit_k: + .long _exit +atexit_k: + .long _atexit +init_k: + .long _init +fini_k: + .long _fini +#ifdef VBR_SETUP +old_vbr_k: + .long old_vbr +vbr_start_k: + .long vbr_start +#endif /* VBR_SETUP */ + +sr_initial_rtos: + ! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work. + ! Whether profiling or not, keep interrupts masked, + ! the RTOS will enable these if required. + .long 0x600000f1 + +rtos_start_fn: + .long ___rtos_profiler_start_timer + +#ifdef PROFILE +sr_initial_bare: + ! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work. + ! For bare machine, we need to enable interrupts to get profiling working + .long 0x60000001 +#else + +sr_initial_bare: + ! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work. + ! Keep interrupts disabled - the application will enable as required. + .long 0x600000f1 +#endif + + ! supplied for backward compatibility only, in case of linking + ! code whose main() was compiled with an older version of GCC. + .global ___main +___main: + rts + nop +#ifdef VBR_SETUP +! Exception handlers + .section .text.vbr, "ax" +vbr_start: + + .org 0x100 +vbr_100: +#ifdef PROFILE + ! Note on register usage. + ! we use r0..r3 as scratch in this code. If we are here due to a trapa for profiling + ! then this is OK as we are just before executing any function code. + ! The other r4..r7 we save explicityl on the stack + ! Remaining registers are saved by normal ABI conventions and we assert we do not + ! use floating point registers. + mov.l expevt_k1, r1 + mov.l @r1, r1 + mov.l event_mask, r0 + and r0,r1 + mov.l trapcode_k, r2 + cmp/eq r1,r2 + bt 1f + bra handler_100 ! if not a trapa, go to default handler + nop +1: + mov.l trapa_k, r0 + mov.l @r0, r0 + shlr2 r0 ! trapa code is shifted by 2. + cmp/eq #33, r0 + bt 2f + bra handler_100 + nop +2: + + ! If here then it looks like we have trap #33 + ! Now we need to call mcount with the following convention + ! Save and restore r4..r7 + mov.l r4,@-r15 + mov.l r5,@-r15 + mov.l r6,@-r15 + mov.l r7,@-r15 + sts.l pr,@-r15 + + ! r4 is frompc. + ! r5 is selfpc + ! r0 is the branch back address. + ! The code sequence emitted by gcc for the profiling trap is + ! .align 2 + ! trapa #33 + ! .align 2 + ! .long lab Where lab is planted by the compiler. This is the address + ! of a datum that needs to be incremented. + sts pr, r4 ! frompc + stc spc, r5 ! selfpc + mov #2, r2 + not r2, r2 ! pattern to align to 4 + and r2, r5 ! r5 now has aligned address +! add #4, r5 ! r5 now has address of address + mov r5, r2 ! Remember it. +! mov.l @r5, r5 ! r5 has value of lable (lab in above example) + add #8, r2 + ldc r2, spc ! our return address avoiding address word + + ! only call mcount if profiling is enabled + mov.l profiling_enabled_k, r0 + mov.l @r0, r0 + cmp/eq #0, r0 + bt 3f + ! call mcount + mov.l mcount_k, r2 + jsr @r2 + nop +3: + lds.l @r15+,pr + mov.l @r15+,r7 + mov.l @r15+,r6 + mov.l @r15+,r5 + mov.l @r15+,r4 + rte + nop + .balign 4 +event_mask: + .long 0xfff +trapcode_k: + .long 0x160 +expevt_k1: + .long 0xff000024 ! Address of expevt +trapa_k: + .long 0xff000020 +mcount_k: + .long __call_mcount +profiling_enabled_k: + .long profiling_enabled +#endif + ! Non profiling case. +handler_100: + mov.l 2f, r0 ! load the old vbr setting (if any) + mov.l @r0, r0 + cmp/eq #0, r0 + bf 1f + ! no previous vbr - jump to own generic handler + bra handler + nop +1: ! there was a previous handler - chain them + add #0x7f, r0 ! 0x7f + add #0x7f, r0 ! 0xfe + add #0x2, r0 ! add 0x100 without corrupting another register + jmp @r0 + nop + .balign 4 +2: + .long old_vbr + + .org 0x400 +vbr_400: ! Should be at vbr+0x400 + mov.l 2f, r0 ! load the old vbr setting (if any) + mov.l @r0, r0 + cmp/eq #0, r0 + ! no previous vbr - jump to own generic handler + bt handler + ! there was a previous handler - chain them + rotcr r0 + rotcr r0 + add #0x7f, r0 ! 0x1fc + add #0x7f, r0 ! 0x3f8 + add #0x02, r0 ! 0x400 + rotcl r0 + rotcl r0 ! Add 0x400 without corrupting another register + jmp @r0 + nop + .balign 4 +2: + .long old_vbr +handler: + /* If the trap handler is there call it */ + mov.l superh_trap_handler_k, r0 + cmp/eq #0, r0 ! True if zero. + bf 3f + bra chandler + nop +3: + ! Here handler available, call it. + /* Now call the trap handler with as much of the context unchanged as possible. + Move trapping address into PR to make it look like the trap point */ + stc spc, r1 + lds r1, pr + mov.l expevt_k, r4 + mov.l @r4, r4 ! r4 is value of expevt, first parameter. + mov r1, r5 ! Remember trapping pc. + mov r1, r6 ! Remember trapping pc. + mov.l chandler_k, r1 + mov.l superh_trap_handler_k, r2 + ! jmp to trap handler to avoid disturbing pr. + jmp @r2 + nop + + .org 0x600 +vbr_600: +#ifdef PROFILE + ! Should be at vbr+0x600 + ! Now we are in the land of interrupts so need to save more state. + ! Save register state + mov.l interrupt_stack_k, r15 ! r15 has been saved to sgr. + mov.l r0,@-r15 + mov.l r1,@-r15 + mov.l r2,@-r15 + mov.l r3,@-r15 + mov.l r4,@-r15 + mov.l r5,@-r15 + mov.l r6,@-r15 + mov.l r7,@-r15 + sts.l pr,@-r15 + sts.l mach,@-r15 + sts.l macl,@-r15 +#if defined(__SH_FPU_ANY__) + ! Save fpul and fpscr, save fr0-fr7 in 64 bit mode + ! and set the pervading precision for the timer_handler + mov #0,r0 + sts.l fpul,@-r15 + sts.l fpscr,@-r15 + lds r0,fpscr ! Clear fpscr + fmov fr0,@-r15 + fmov fr1,@-r15 + fmov fr2,@-r15 + fmov fr3,@-r15 + mov.l pervading_precision_k,r0 + fmov fr4,@-r15 + fmov fr5,@-r15 + mov.l @r0,r0 + fmov fr6,@-r15 + fmov fr7,@-r15 + lds r0,fpscr +#endif /* __SH_FPU_ANY__ */ + ! Pass interrupted pc to timer_handler as first parameter (r4). + stc spc, r4 + mov.l timer_handler_k, r0 + jsr @r0 + nop +#if defined(__SH_FPU_ANY__) + mov #0,r0 + lds r0,fpscr ! Clear the fpscr + fmov @r15+,fr7 + fmov @r15+,fr6 + fmov @r15+,fr5 + fmov @r15+,fr4 + fmov @r15+,fr3 + fmov @r15+,fr2 + fmov @r15+,fr1 + fmov @r15+,fr0 + lds.l @r15+,fpscr + lds.l @r15+,fpul +#endif /* __SH_FPU_ANY__ */ + lds.l @r15+,macl + lds.l @r15+,mach + lds.l @r15+,pr + mov.l @r15+,r7 + mov.l @r15+,r6 + mov.l @r15+,r5 + mov.l @r15+,r4 + mov.l @r15+,r3 + mov.l @r15+,r2 + mov.l @r15+,r1 + mov.l @r15+,r0 + stc sgr, r15 ! Restore r15, destroyed by this sequence. + rte + nop +#if defined(__SH_FPU_ANY__) + .balign 4 +pervading_precision_k: +#define CONCAT1(A,B) A##B +#define CONCAT(A,B) CONCAT1(A,B) + .long CONCAT(__USER_LABEL_PREFIX__,__fpscr_values)+4 +#endif +#else + mov.l 2f, r0 ! Load the old vbr setting (if any). + mov.l @r0, r0 + cmp/eq #0, r0 + ! no previous vbr - jump to own handler + bt chandler + ! there was a previous handler - chain them + rotcr r0 + rotcr r0 + add #0x7f, r0 ! 0x1fc + add #0x7f, r0 ! 0x3f8 + add #0x7f, r0 ! 0x5f4 + add #0x03, r0 ! 0x600 + rotcl r0 + rotcl r0 ! Add 0x600 without corrupting another register + jmp @r0 + nop + .balign 4 +2: + .long old_vbr +#endif /* PROFILE code */ +chandler: + mov.l expevt_k, r4 + mov.l @r4, r4 ! r4 is value of expevt hence making this the return code + mov.l handler_exit_k,r0 + jsr @r0 + nop + ! We should never return from _exit but in case we do we would enter the + ! the following tight loop +limbo: + bra limbo + nop + .balign 4 +#ifdef PROFILE +interrupt_stack_k: + .long __timer_stack ! The high end of the stack +timer_handler_k: + .long __profil_counter +#endif +expevt_k: + .long 0xff000024 ! Address of expevt +chandler_k: + .long chandler +superh_trap_handler_k: + .long __superh_trap_handler +handler_exit_k: + .long _exit + .align 2 +! Simulated compile of trap handler. + .section .debug_abbrev,"",@progbits +.Ldebug_abbrev0: + .section .debug_info,"",@progbits +.Ldebug_info0: + .section .debug_line,"",@progbits +.Ldebug_line0: + .text +.Ltext0: + .align 5 + .type __superh_trap_handler,@function +__superh_trap_handler: +.LFB1: + mov.l r14,@-r15 +.LCFI0: + add #-4,r15 +.LCFI1: + mov r15,r14 +.LCFI2: + mov.l r4,@r14 + lds r1, pr + add #4,r14 + mov r14,r15 + mov.l @r15+,r14 + rts + nop +.LFE1: +.Lfe1: + .size __superh_trap_handler,.Lfe1-__superh_trap_handler + .section .debug_frame,"",@progbits +.Lframe0: + .ualong .LECIE0-.LSCIE0 +.LSCIE0: + .ualong 0xffffffff + .byte 0x1 + .string "" + .uleb128 0x1 + .sleb128 -4 + .byte 0x11 + .byte 0xc + .uleb128 0xf + .uleb128 0x0 + .align 2 +.LECIE0: +.LSFDE0: + .ualong .LEFDE0-.LASFDE0 +.LASFDE0: + .ualong .Lframe0 + .ualong .LFB1 + .ualong .LFE1-.LFB1 + .byte 0x4 + .ualong .LCFI0-.LFB1 + .byte 0xe + .uleb128 0x4 + .byte 0x4 + .ualong .LCFI1-.LCFI0 + .byte 0xe + .uleb128 0x8 + .byte 0x8e + .uleb128 0x1 + .byte 0x4 + .ualong .LCFI2-.LCFI1 + .byte 0xd + .uleb128 0xe + .align 2 +.LEFDE0: + .text +.Letext0: + .section .debug_info + .ualong 0xb3 + .uaword 0x2 + .ualong .Ldebug_abbrev0 + .byte 0x4 + .uleb128 0x1 + .ualong .Ldebug_line0 + .ualong .Letext0 + .ualong .Ltext0 + .string "trap_handler.c" + .string "xxxxxxxxxxxxxxxxxxxxxxxxxxxx" + .string "GNU C 3.2 20020529 (experimental)" + .byte 0x1 + .uleb128 0x2 + .ualong 0xa6 + .byte 0x1 + .string "_superh_trap_handler" + .byte 0x1 + .byte 0x2 + .byte 0x1 + .ualong .LFB1 + .ualong .LFE1 + .byte 0x1 + .byte 0x5e + .uleb128 0x3 + .string "trap_reason" + .byte 0x1 + .byte 0x1 + .ualong 0xa6 + .byte 0x2 + .byte 0x91 + .sleb128 0 + .byte 0x0 + .uleb128 0x4 + .string "unsigned int" + .byte 0x4 + .byte 0x7 + .byte 0x0 + .section .debug_abbrev + .uleb128 0x1 + .uleb128 0x11 + .byte 0x1 + .uleb128 0x10 + .uleb128 0x6 + .uleb128 0x12 + .uleb128 0x1 + .uleb128 0x11 + .uleb128 0x1 + .uleb128 0x3 + .uleb128 0x8 + .uleb128 0x1b + .uleb128 0x8 + .uleb128 0x25 + .uleb128 0x8 + .uleb128 0x13 + .uleb128 0xb + .byte 0x0 + .byte 0x0 + .uleb128 0x2 + .uleb128 0x2e + .byte 0x1 + .uleb128 0x1 + .uleb128 0x13 + .uleb128 0x3f + .uleb128 0xc + .uleb128 0x3 + .uleb128 0x8 + .uleb128 0x3a + .uleb128 0xb + .uleb128 0x3b + .uleb128 0xb + .uleb128 0x27 + .uleb128 0xc + .uleb128 0x11 + .uleb128 0x1 + .uleb128 0x12 + .uleb128 0x1 + .uleb128 0x40 + .uleb128 0xa + .byte 0x0 + .byte 0x0 + .uleb128 0x3 + .uleb128 0x5 + .byte 0x0 + .uleb128 0x3 + .uleb128 0x8 + .uleb128 0x3a + .uleb128 0xb + .uleb128 0x3b + .uleb128 0xb + .uleb128 0x49 + .uleb128 0x13 + .uleb128 0x2 + .uleb128 0xa + .byte 0x0 + .byte 0x0 + .uleb128 0x4 + .uleb128 0x24 + .byte 0x0 + .uleb128 0x3 + .uleb128 0x8 + .uleb128 0xb + .uleb128 0xb + .uleb128 0x3e + .uleb128 0xb + .byte 0x0 + .byte 0x0 + .byte 0x0 + .section .debug_pubnames,"",@progbits + .ualong 0x27 + .uaword 0x2 + .ualong .Ldebug_info0 + .ualong 0xb7 + .ualong 0x67 + .string "_superh_trap_handler" + .ualong 0x0 + .section .debug_aranges,"",@progbits + .ualong 0x1c + .uaword 0x2 + .ualong .Ldebug_info0 + .byte 0x4 + .byte 0x0 + .uaword 0x0 + .uaword 0x0 + .ualong .Ltext0 + .ualong .Letext0-.Ltext0 + .ualong 0x0 + .ualong 0x0 +#endif /* VBR_SETUP */ +#endif /* ! __SH5__ */ diff --git a/gcc/config/sh/crti.asm b/gcc/config/sh/crti.asm new file mode 100644 index 000000000..ef5cd719d --- /dev/null +++ b/gcc/config/sh/crti.asm @@ -0,0 +1,125 @@ +/* Copyright (C) 2000, 2001, 2009 Free Software Foundation, Inc. + This file was adapted from glibc sources. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +/* The code in sections .init and .fini is supposed to be a single + regular function. The function in .init is called directly from + start in crt1.asm. The function in .fini is atexit()ed in crt1.asm + too. + + crti.asm contributes the prologue of a function to these sections, + and crtn.asm comes up the epilogue. STARTFILE_SPEC should list + crti.o before any other object files that might add code to .init + or .fini sections, and ENDFILE_SPEC should list crtn.o after any + such object files. */ + + .section .init +/* The alignment below can't be smaller, otherwise the mova below + breaks. Yes, we might align just the label, but then we'd be + exchanging an alignment here for one there, since the code fragment + below ensures 4-byte alignment on __ELF__. */ +#ifdef __ELF__ + .p2align 2 +#else + .p2align 1 +#endif + .global _init +_init: +#if __SHMEDIA__ + addi r15, -16, r15 + st.q r15, 8, r14 + st.q r15, 0, r18 + add r15, r63, r14 +#elif __SH5__ && ! __SHMEDIA__ + mov r15,r0 + add #-8,r15 + mov.l r14,@-r0 + sts.l pr,@-r0 + mov r15,r14 + nop +#else +#ifdef __ELF__ + mov.l r12,@-r15 + mova 0f,r0 + mov.l 0f,r12 +#endif + mov.l r14,@-r15 +#ifdef __ELF__ + add r0,r12 +#endif + sts.l pr,@-r15 +#ifdef __ELF__ + bra 1f +#endif + mov r15,r14 +#ifdef __ELF__ +0: .long _GLOBAL_OFFSET_TABLE_ +1: +#endif +#endif /* __SHMEDIA__ */ + + .section .fini +/* The alignment below can't be smaller, otherwise the mova below + breaks. Yes, we might align just the label, but then we'd be + exchanging an alignment here for one there, since the code fragment + below ensures 4-byte alignment on __ELF__. */ +#ifdef __ELF__ + .p2align 2 +#else + .p2align 1 +#endif + .global _fini +_fini: +#if __SHMEDIA__ + addi r15, -16, r15 + st.q r15, 8, r14 + st.q r15, 0, r18 + add r15, r63, r14 +#elif __SH5__ && ! __SHMEDIA__ + mov r15,r0 + add #-8,r15 + mov.l r14,@-r0 + sts.l pr,@-r0 + mov r15,r14 + nop +#else +#ifdef __ELF__ + mov.l r12,@-r15 + mova 0f,r0 + mov.l 0f,r12 +#endif + mov.l r14,@-r15 +#ifdef __ELF__ + add r0,r12 +#endif + sts.l pr,@-r15 +#ifdef __ELF__ + bra 1f +#endif + mov r15,r14 +#ifdef __ELF__ +0: .long _GLOBAL_OFFSET_TABLE_ +1: +#endif +#endif /* __SHMEDIA__ */ diff --git a/gcc/config/sh/crtn.asm b/gcc/config/sh/crtn.asm new file mode 100644 index 000000000..670d90f7b --- /dev/null +++ b/gcc/config/sh/crtn.asm @@ -0,0 +1,77 @@ +/* Copyright (C) 2000, 2001, 2009 Free Software Foundation, Inc. + This file was adapted from glibc sources. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* See an explanation about .init and .fini in crti.asm. */ + + .section .init +#if __SHMEDIA__ + add r14, r63, r15 + ld.q r15, 0, r18 + ptabs r18, tr0 + ld.q r15, 8, r14 + addi r15, 16, r15 + blink tr0, r63 +#elif __SH5__ && ! __SHMEDIA__ + mov r14,r15 + lds.l @r14+,pr + mov.l @r14,r14 + rts + add #8,r15 +#else + mov r14,r15 + lds.l @r15+,pr + mov.l @r15+,r14 + rts +#ifdef __ELF__ + mov.l @r15+,r12 +#else + nop +#endif +#endif /* __SHMEDIA__ */ + + .section .fini +#if __SHMEDIA__ + add r14, r63, r15 + ld.q r15, 0, r18 + ptabs r18, tr0 + ld.q r15, 8, r14 + addi r15, 16, r15 + blink tr0, r63 +#elif __SH5__ && ! __SHMEDIA__ + mov r14,r15 + lds.l @r14+,pr + mov.l @r14,r14 + rts + add #8,r15 +#else + mov r14,r15 + lds.l @r15+,pr + mov.l @r15+,r14 + rts +#ifdef __ELF__ + mov.l @r15+,r12 +#else + nop +#endif +#endif /* __SHMEDIA__ */ diff --git a/gcc/config/sh/divcost-analysis b/gcc/config/sh/divcost-analysis new file mode 100644 index 000000000..d55bb6621 --- /dev/null +++ b/gcc/config/sh/divcost-analysis @@ -0,0 +1,88 @@ +Analysis of cycle costs for SH4: + +-> udiv_le128: 5 +-> udiv_ge64k: 6 +-> udiv udiv_25: 10 +-> pos_divisor: 3 +-> pos_result linear: 5 +-> pos_result - -: 5 +-> div_le128: 7 +-> div_ge64k: 9 +sdivsi3 -> udiv_25 13 +udiv25 -> div_ge64k_end: 15 +div_ge64k_end -> rts: 13 +div_le128 -> div_le128_2: 2, r1 latency 3 +udiv_le128 -> div_le128_2: 2, r1 latency 3 +(u)div_le128 -> div_by_1: 9 +(u)div_le128 -> rts: 17 +div_by_1(_neg) -> rts: 4 +div_ge64k -> div_r8: 2 +div_ge64k -> div_ge64k_2: 3 +udiv_ge64k -> udiv_r8: 3 +udiv_ge64k -> div_ge64k_2: 3 + LS +(u)div_ge64k -> div_ge64k_end: 13 +div_r8 -> div_r8_2: 2 +udiv_r8 -> div_r8_2: 2 + LS +(u)div_r8 -> rts: 21 + +-> - + neg_result: 5 +-> + - neg_result: 5 +-> div_le128_neg: 7 +-> div_ge64k_neg: 9 +-> div_r8_neg: 11 +-> <64k div_ge64k_neg_end: 28 +-> >=64k div_ge64k_neg_end: 22 +div_ge64k_neg_end ft -> rts: 14 +div_r8_neg_end -> rts: 4 +div_r8_neg -> div_r8_neg_end: 18 +div_le128_neg -> div_by_1_neg: 4 +div_le128_neg -> rts 18 + + sh4-200 absolute divisor range: + 1 [2..128] [129..64K) [64K..|dividend|/256] >=64K,>|dividend/256| +udiv 18 22 38 32 30 +sdiv pos: 20 24 41 35 32 +sdiv neg: 15 25 42 36 33 + + sh4-300 absolute divisor range: + 8 bit 16 bit 24 bit > 24 bit +udiv 15 35 28 25 +sdiv 14 36 34 31 + + +fp-based: + +unsigned: 42 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site +signed: 33 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site + +call-div1: divisor range: + [1..64K) >= 64K +unsigned: 63 58 +signed: 76 76 + +SFUNC_STATIC call overhead: +mov.l 0f,r1 +bsrf r1 + +SFUNC_GOT call overhead - current: +mov.l 0f,r1 +mova 0f,r0 +mov.l 1f,r2 +add r1,r0 +mov.l @(r0,r2),r0 +jmp @r0 +; 3 cycles worse than SFUNC_STATIC + +SFUNC_GOT call overhead - improved assembler: +mov.l 0f,r1 +mova 0f,r0 +mov.l @(r0,r1),r0 +jmp @r0 +; 2 cycles worse than SFUNC_STATIC + + +Copyright (C) 2006, 2007 Free Software Foundation, Inc. + +Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. diff --git a/gcc/config/sh/divtab-sh4-300.c b/gcc/config/sh/divtab-sh4-300.c new file mode 100644 index 000000000..c8a65cfbc --- /dev/null +++ b/gcc/config/sh/divtab-sh4-300.c @@ -0,0 +1,77 @@ +/* Copyright (C) 2004, 2006, 2009 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +/* Calculate division table for ST40-300 integer division + Contributed by Joern Rennecke + joern.rennecke@st.com */ + +#include +#include + +int +main () +{ + int i, j; + double q, r, err, max_err = 0, max_s_err = 0; + + puts("/* This table has been generated by divtab-sh4.c. */"); + puts ("\t.balign 4"); + for (i = -128; i < 128; i++) + { + int n = 0; + if (i == 0) + { + /* output some dummy number for 1/0. */ + puts ("LOCAL(div_table_clz):\n\t.byte\t0"); + continue; + } + for (j = i < 0 ? -i : i; j < 128; j += j) + n++; + printf ("\t.byte\t%d\n", n - 7); + } + puts("\ +/* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32,\n\ + or in bit 33 for powers of two. */\n\ + .balign 4"); + for (i = -128; i < 128; i++) + { + if (i == 0) + { + puts ("LOCAL(div_table_inv):\n\t.long\t0x0"); + continue; + } + j = i < 0 ? -i : i; + while (j < 64) + j += j; + q = 4.*(1<<30)*128/j; + r = ceil (q); + printf ("\t.long\t0x%X\n", (unsigned) r); + err = r - q; + if (err > max_err) + max_err = err; + err = err * j / 128; + if (err > max_s_err) + max_s_err = err; + } + printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err); + exit (0); +} diff --git a/gcc/config/sh/divtab-sh4.c b/gcc/config/sh/divtab-sh4.c new file mode 100644 index 000000000..758508130 --- /dev/null +++ b/gcc/config/sh/divtab-sh4.c @@ -0,0 +1,85 @@ +/* Copyright (C) 2004, 2009 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Calculate division table for SH2..4 integer division + Contributed by Joern Rernnecke + joern.rennecke@superh.com */ + +#include +#include + +int +main () +{ + int i, j; + double q, r, err, max_err = 0, max_s_err = 0; + + puts("/* This table has been generated by divtab-sh4.c. */"); + puts ("\t.balign 4"); + puts ("LOCAL(div_table_clz):"); + /* output some dummy number for 1/0. */ + printf ("\t.byte\t%d\n", 0); + for (i = 1; i <= 128; i++) + { + int n = 0; + if (i == 128) + puts ("\ +/* Lookup table translating positive divisor to index into table of\n\ + normalized inverse. N.B. the '0' entry is also the last entry of the\n\ + previous table, and causes an unaligned access for division by zero. */\n\ +LOCAL(div_table_ix):"); + for (j = i; j <= 128; j += j) + n++; + printf ("\t.byte\t%d\n", n - 7); + } + for (i = 1; i <= 128; i++) + { + j = i < 0 ? -i : i; + while (j < 128) + j += j; + printf ("\t.byte\t%d\n", j * 2 - 96*4); + } + puts("\ +/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */\n\ + .balign 4\n\ +LOCAL(zero_l):"); + for (i = 64; i < 128; i++) + { + if (i == 96) + puts ("LOCAL(div_table):"); + q = 4.*(1<<30)*128/i; + r = ceil (q); + /* The value for 64 is actually differently scaled that it would + appear from this calculation. The implicit part is %01, not 10. + Still, since the value in the table is 0 either way, this + doesn't matter here. Still, the 1/64 entry is effectively a 1/128 + entry. */ + printf ("\t.long\t0x%X\n", (unsigned) r); + err = r - q; + if (err > max_err) + max_err = err; + err = err * i / 128; + if (err > max_s_err) + max_s_err = err; + } + printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err); + exit (0); +} diff --git a/gcc/config/sh/divtab.c b/gcc/config/sh/divtab.c new file mode 100644 index 000000000..f8db2f508 --- /dev/null +++ b/gcc/config/sh/divtab.c @@ -0,0 +1,200 @@ +/* Copyright (C) 2003, 2009 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +/* Calculate division table for SH5Media integer division + Contributed by Joern Rennecke + joern.rennecke@superh.com */ + +#include +#include + +#define BITS 5 +#define N_ENTRIES (1 << BITS) +#define CUTOFF_BITS 20 + +#define BIAS (-330) + +double max_defect = 0.; +double max_defect_x; + +double min_defect = 1e9; +double min_defect_x; + +double max_defect2 = 0.; +double max_defect2_x; + +double min_defect2 = 0.; +double min_defect2_x; + +double min_defect3 = 01e9; +double min_defect3_x; +int min_defect3_val; + +double max_defect3 = 0.; +double max_defect3_x; +int max_defect3_val; + +static double note_defect3 (int val, double d2, double y2d, double x) +{ + int cutoff_val = val >> CUTOFF_BITS; + double cutoff; + double defect; + + if (val < 0) + cutoff_val++; + cutoff = (cutoff_val * (1< max_defect3) + { + max_defect3 = defect; + max_defect3_x = x; + max_defect3_val = val; + } + if (defect < min_defect3) + { + min_defect3 = defect; + min_defect3_x = x; + min_defect3_val = val; + } +} + +/* This function assumes 32-bit integers. */ +static double +calc_defect (double x, int constant, int factor) +{ + double y0 = (constant - (int) floor ((x * factor * 64.))) / 16384.; + double y1 = 2 * y0 -y0 * y0 * (x + BIAS / (1.*(1LL<<30))); + double y2d0, y2d; + int y2d1; + double d, d2; + + y1 = floor (y1 * (1024 * 1024 * 1024)) / (1024 * 1024 * 1024); + d = y1 - 1 / x; + if (d > max_defect) + { + max_defect = d; + max_defect_x = x; + } + if (d < min_defect) + { + min_defect = d; + min_defect_x = x; + } + y2d0 = floor (y1 * x * (1LL << 60-16)); + y2d1 = (int) (long long) y2d0; + y2d = - floor ((y1 - y0 / (1<<30-14)) * y2d1) / (1LL<<44); + d2 = y1 + y2d - 1/x; + if (d2 > max_defect2) + { + max_defect2 = d2; + max_defect2_x = x; + } + if (d2 < min_defect2) + { + min_defect2 = d2; + min_defect2_x = x; + } + /* zero times anything is trivially zero. */ + note_defect3 ((1 << CUTOFF_BITS) - 1, d2, y2d, x); + note_defect3 (1 << CUTOFF_BITS, d2, y2d, x); + note_defect3 ((1U << 31) - (1 << CUTOFF_BITS), d2, y2d, x); + note_defect3 ((1U << 31) - 1, d2, y2d, x); + note_defect3 (-1, d2, y2d, x); + note_defect3 (-(1 << CUTOFF_BITS), d2, y2d, x); + note_defect3 ((1U << 31) - (1 << CUTOFF_BITS) + 1, d2, y2d, x); + note_defect3 (-(1U << 31), d2, y2d, x); + return d; +} + +int +main () +{ + int i; + unsigned char factors[N_ENTRIES]; + short constants[N_ENTRIES]; + int steps = N_ENTRIES / 2; + double step = 1. / steps; + double eps30 = 1. / (1024 * 1024 * 1024); + + for (i = 0; i < N_ENTRIES; i++) + { + double x_low = (i < steps ? 1. : -3.) + i * step; + double x_high = x_low + step - eps30; + double x_med; + int factor, constant; + double low_defect, med_defect, high_defect, max_defect; + + factor = (1./x_low- 1./x_high) / step * 256. + 0.5; + if (factor == 256) + factor = 255; + factors[i] = factor; + /* Use minimum of error function for x_med. */ + x_med = sqrt (256./factor); + if (x_low < 0) + x_med = - x_med; + low_defect = 1. / x_low + x_low * factor / 256.; + high_defect = 1. / x_high + x_high * factor / 256.; + med_defect = 1. / x_med + x_med * factor / 256.; + max_defect + = ((low_defect > high_defect) ^ (x_med < 0)) ? low_defect : high_defect; + constant = (med_defect + max_defect) * 0.5 * 16384. + 0.5; + if (constant < -32768 || constant > 32767) + abort (); + constants[i] = constant; + calc_defect (x_low, constant, factor); + calc_defect (x_med, constant, factor); + calc_defect (x_high, constant, factor); + } + printf ("/* This table has been generated by divtab.c .\n"); + printf ("Defects for bias %d:\n", BIAS); + printf (" Max defect: %e at %e\n", max_defect, max_defect_x); + printf (" Min defect: %e at %e\n", min_defect, min_defect_x); + printf (" Max 2nd step defect: %e at %e\n", max_defect2, max_defect2_x); + printf (" Min 2nd step defect: %e at %e\n", min_defect2, min_defect2_x); + printf (" Max div defect: %e at %d:%e\n", max_defect3, max_defect3_val, max_defect3_x); + printf (" Min div defect: %e at %d:%e\n", min_defect3, min_defect3_val, min_defect3_x); + printf (" Defect at 1: %e\n", + calc_defect (1., constants[0], factors[0])); + printf (" Defect at -2: %e */\n", + calc_defect (-2., constants[steps], factors[steps])); + printf ("\t.section\t.rodata\n"); + printf ("\t.balign 2\n"); + printf ("/* negative division constants */\n"); + for (i = steps; i < 2 * steps; i++) + printf ("\t.word\t%d\n", constants[i]); + printf ("/* negative division factors */\n"); + for (i = steps; i < 2*steps; i++) + printf ("\t.byte\t%d\n", factors[i]); + printf ("\t.skip %d\n", steps); + printf ("\t.global GLOBAL(div_table):\n"); + printf ("GLOBAL(div_table):\n"); + printf ("\t.skip %d\n", steps); + printf ("/* positive division factors */\n"); + for (i = 0; i < steps; i++) + printf ("\t.byte\t%d\n", factors[i]); + printf ("/* positive division constants */\n"); + for (i = 0; i < steps; i++) + printf ("\t.word\t%d\n", constants[i]); + exit (0); +} diff --git a/gcc/config/sh/elf.h b/gcc/config/sh/elf.h new file mode 100644 index 000000000..336743cc8 --- /dev/null +++ b/gcc/config/sh/elf.h @@ -0,0 +1,90 @@ +/* Definitions of target machine for gcc for Renesas / SuperH SH using ELF. + Copyright (C) 1996, 1997, 2000, 2001, 2002, 2004, 2005, 2007, 2010 + Free Software Foundation, Inc. + Contributed by Ian Lance Taylor . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Let sh.c know this is ELF. */ +#undef TARGET_ELF +#define TARGET_ELF 1 + +/* Generate DWARF2 debugging information and make it the default */ +#define DWARF2_DEBUGGING_INFO 1 + +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* use a more compact format for line information */ +#define DWARF2_ASM_LINE_DEBUG_INFO 1 + +#undef WCHAR_TYPE +/* #define WCHAR_TYPE (TARGET_SH5 ? "int" : "long int") */ +#define WCHAR_TYPE SH_ELF_WCHAR_TYPE + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + + +/* The prefix to add to user-visible assembler symbols. */ + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +#undef SIZE_TYPE +#define SIZE_TYPE (TARGET_SH5 ? "long unsigned int" : "unsigned int") + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_SH5 ? "long int" : "int") + +/* Pass -ml and -mrelax to the assembler and linker. */ +#undef ASM_SPEC +#define ASM_SPEC SH_ASM_SPEC + +#undef LINK_SPEC +#define LINK_SPEC SH_LINK_SPEC +#undef LINK_EMUL_PREFIX +#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN +#define LINK_EMUL_PREFIX "sh%{!mb:l}elf" +#else +#define LINK_EMUL_PREFIX "sh%{ml:l}elf" +#endif + +#define DBX_REGISTER_NUMBER(REGNO) SH_DBX_REGISTER_NUMBER (REGNO) + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \ + sprintf ((STRING), "*%s%s%ld", LOCAL_LABEL_PREFIX, (PREFIX), (long)(NUM)) + +#define DBX_LINES_FUNCTION_RELATIVE 1 +#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared: crt1.o%s} crti.o%s \ + %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s" + +#undef LIB_SPEC +#define LIB_SPEC "-lc" + +/* ASM_OUTPUT_CASE_LABEL is defined in elfos.h. With it, + a redundant .align was generated. */ +#undef ASM_OUTPUT_CASE_LABEL diff --git a/gcc/config/sh/embed-elf.h b/gcc/config/sh/embed-elf.h new file mode 100644 index 000000000..a9f6d9438 --- /dev/null +++ b/gcc/config/sh/embed-elf.h @@ -0,0 +1,36 @@ +/* Definitions of target machine for GNU compiler for Renesas / SuperH SH + non-Linux embedded targets. + Copyright (C) 2002, 2003, 2007, 2010, 2011 Free Software Foundation, Inc. + Contributed by J"orn Rennecke + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + +/* While the speed-optimized implementations of udivsi3_i4i / sdivsi3_i4i + in libgcc are not available for SH2, the space-optimized ones in + libgcc-Os-4-200 are. Thus, when not optimizing for space, link + libgcc-Os-4-200 after libgcc, so that -mdiv=call-table works for -m2. */ +#define LIBGCC_SPEC "%{!shared: \ + %{m4-100*:-lic_invalidate_array_4-100} \ + %{m4-200*:-lic_invalidate_array_4-200} \ + %{m4-300*|m4-340:-lic_invalidate_array_4a %{!Os: -lgcc-4-300}} \ + %{m4a*:-lic_invalidate_array_4a}} \ + %{Os: -lgcc-Os-4-200} \ + -lgcc \ + %{!Os: -lgcc-Os-4-200}" diff --git a/gcc/config/sh/lib1funcs-4-300.asm b/gcc/config/sh/lib1funcs-4-300.asm new file mode 100644 index 000000000..b131877f1 --- /dev/null +++ b/gcc/config/sh/lib1funcs-4-300.asm @@ -0,0 +1,936 @@ +/* Copyright (C) 2004, 2006, 2009 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +/* libgcc routines for the STMicroelectronics ST40-300 CPU. + Contributed by J"orn Rennecke joern.rennecke@st.com. */ + +#include "lib1funcs.h" + +#if !__SHMEDIA__ +#ifdef L_div_table +#if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) +/* This code used shld, thus is not suitable for SH1 / SH2. */ + +/* Signed / unsigned division without use of FPU, optimized for SH4-300. + Uses a lookup table for divisors in the range -128 .. +127, and + div1 with case distinction for larger divisors in three more ranges. + The code is lumped together with the table to allow the use of mova. */ +#ifdef __LITTLE_ENDIAN__ +#define L_LSB 0 +#define L_LSWMSB 1 +#define L_MSWLSB 2 +#else +#define L_LSB 3 +#define L_LSWMSB 2 +#define L_MSWLSB 1 +#endif + + .global GLOBAL(udivsi3_i4i) + .global GLOBAL(sdivsi3_i4i) + FUNC(GLOBAL(udivsi3_i4i)) + FUNC(GLOBAL(sdivsi3_i4i)) + + .balign 4 +LOCAL(div_ge8m): ! 10 cycles up to here + rotcr r1 ! signed shift must use original sign from r4 + div0s r5,r4 + mov #24,r7 + shld r7,r6 + shad r0,r1 + rotcl r6 + div1 r5,r1 + swap.w r5,r0 ! detect -0x80000000 : 0x800000 + rotcl r6 + swap.w r4,r7 + div1 r5,r1 + swap.b r7,r7 + rotcl r6 + or r7,r0 + div1 r5,r1 + swap.w r0,r7 + rotcl r6 + or r7,r0 + div1 r5,r1 + add #-0x80,r0 + rotcl r6 + extu.w r0,r0 + div1 r5,r1 + neg r0,r0 + rotcl r6 + swap.w r0,r0 + div1 r5,r1 + mov.l @r15+,r7 + and r6,r0 + rotcl r6 + div1 r5,r1 + shll2 r0 + rotcl r6 + exts.b r0,r0 + div1 r5,r1 + swap.w r0,r0 + exts.w r0,r1 + exts.b r6,r0 + mov.l @r15+,r6 + rotcl r0 + rts + sub r1,r0 + ! 31 cycles up to here + + .balign 4 +LOCAL(udiv_ge64k): ! 3 cycles up to here + mov r4,r0 + shlr8 r0 + div0u + cmp/hi r0,r5 + bt LOCAL(udiv_r8) + mov.l r5,@-r15 + shll8 r5 + ! 7 cycles up to here + .rept 8 + div1 r5,r0 + .endr + extu.b r4,r1 ! 15 cycles up to here + extu.b r0,r6 + xor r1,r0 + xor r6,r0 + swap.b r6,r6 + .rept 8 + div1 r5,r0 + .endr ! 25 cycles up to here + extu.b r0,r0 + mov.l @r15+,r5 + or r6,r0 + mov.l @r15+,r6 + rts + rotcl r0 ! 28 cycles up to here + + .balign 4 +LOCAL(udiv_r8): ! 6 cycles up to here + mov.l r4,@-r15 + shll16 r4 + shll8 r4 + ! + shll r4 + mov r0,r1 + div1 r5,r1 + mov r4,r0 + rotcl r0 + mov.l @r15+,r4 + div1 r5,r1 + ! 12 cycles up to here + .rept 6 + rotcl r0; div1 r5,r1 + .endr + mov.l @r15+,r6 ! 24 cycles up to here + rts + rotcl r0 + + .balign 4 +LOCAL(div_ge32k): ! 6 cycles up to here + mov.l r7,@-r15 + swap.w r5,r6 + exts.b r6,r7 + exts.w r6,r6 + cmp/eq r6,r7 + extu.b r1,r6 + bf/s LOCAL(div_ge8m) + cmp/hi r1,r4 ! copy sign bit of r4 into T + rotcr r1 ! signed shift must use original sign from r4 + div0s r5,r4 + shad r0,r1 + shll8 r5 + div1 r5,r1 + mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00) + div1 r5,r1 + shlr8 r7 + div1 r5,r1 + swap.w r4,r0 + div1 r5,r1 + swap.b r0,r0 + div1 r5,r1 + or r0,r7 + div1 r5,r1 + add #-80,r7 + div1 r5,r1 + swap.w r7,r0 + div1 r5,r1 + or r0,r7 + extu.b r1,r0 + xor r6,r1 + xor r0,r1 + exts.b r0,r0 + div1 r5,r1 + extu.w r7,r7 + div1 r5,r1 + neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000 + div1 r5,r1 + and r0,r7 + div1 r5,r1 + swap.w r7,r7 ! 26 cycles up to here. + div1 r5,r1 + shll8 r0 + div1 r5,r1 + exts.w r7,r7 + div1 r5,r1 + add r0,r0 + div1 r5,r1 + sub r7,r0 + extu.b r1,r1 + mov.l @r15+,r7 + rotcl r1 + mov.l @r15+,r6 + add r1,r0 + mov #-8,r1 + rts + shad r1,r5 ! 34 cycles up to here + + .balign 4 +GLOBAL(udivsi3_i4i): + mov.l r6,@-r15 + extu.w r5,r6 + cmp/eq r5,r6 + mov #0x7f,r0 + bf LOCAL(udiv_ge64k) + cmp/hi r0,r5 + bf LOCAL(udiv_le128) + mov r4,r1 + shlr8 r1 + div0u + shlr r1 + shll16 r6 + div1 r6,r1 + extu.b r4,r0 ! 7 cycles up to here + .rept 8 + div1 r6,r1 + .endr ! 15 cycles up to here + xor r1,r0 ! xor dividend with result lsb + .rept 6 + div1 r6,r1 + .endr + mov.l r7,@-r15 ! 21 cycles up to here + div1 r6,r1 + extu.b r0,r7 + div1 r6,r1 + shll8 r7 + extu.w r1,r0 + xor r7,r1 ! replace lsb of result with lsb of dividend + div1 r6,r1 + mov #0,r7 + div1 r6,r1 + ! + div1 r6,r1 + bra LOCAL(div_end) + div1 r6,r1 ! 28 cycles up to here + + /* This is link-compatible with a GLOBAL(sdivsi3) call, + but we effectively clobber only r1, macl and mach */ + /* Because negative quotients are calculated as one's complements, + -0x80000000 divided by the smallest positive number of a number + range (0x80, 0x8000, 0x800000) causes saturation in the one's + complement representation, and we have to suppress the + one's -> two's complement adjustment. Since positive numbers + don't get such an adjustment, it's OK to also compute one's -> two's + complement adjustment suppression for a dividend of 0. */ + .balign 4 +GLOBAL(sdivsi3_i4i): + mov.l r6,@-r15 + exts.b r5,r6 + cmp/eq r5,r6 + mov #-1,r1 + bt/s LOCAL(div_le128) + cmp/pz r4 + addc r4,r1 + exts.w r5,r6 + cmp/eq r5,r6 + mov #-7,r0 + bf/s LOCAL(div_ge32k) + cmp/hi r1,r4 ! copy sign bit of r4 into T + rotcr r1 + shll16 r6 ! 7 cycles up to here + shad r0,r1 + div0s r5,r4 + div1 r6,r1 + mov.l r7,@-r15 + div1 r6,r1 + mov r4,r0 ! re-compute adjusted dividend + div1 r6,r1 + mov #-31,r7 + div1 r6,r1 + shad r7,r0 + div1 r6,r1 + add r4,r0 ! adjusted dividend + div1 r6,r1 + mov.l r8,@-r15 + div1 r6,r1 + swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80 + div1 r6,r1 + swap.b r8,r8 + xor r1,r0 ! xor dividend with result lsb + div1 r6,r1 + div1 r6,r1 + or r5,r8 + div1 r6,r1 + add #-0x80,r8 ! r8 is 0 iff there is a match + div1 r6,r1 + swap.w r8,r7 ! or upper 16 bits... + div1 r6,r1 + or r7,r8 !...into lower 16 bits + div1 r6,r1 + extu.w r8,r8 + div1 r6,r1 + extu.b r0,r7 + div1 r6,r1 + shll8 r7 + exts.w r1,r0 + xor r7,r1 ! replace lsb of result with lsb of dividend + div1 r6,r1 + neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm. + div1 r6,r1 + and r0,r8 + div1 r6,r1 + swap.w r8,r7 + div1 r6,r1 + mov.l @r15+,r8 ! 58 insns, 29 cycles up to here +LOCAL(div_end): + div1 r6,r1 + shll8 r0 + div1 r6,r1 + exts.w r7,r7 + div1 r6,r1 + add r0,r0 + div1 r6,r1 + sub r7,r0 + extu.b r1,r1 + mov.l @r15+,r7 + rotcl r1 + mov.l @r15+,r6 + rts + add r1,r0 + + .balign 4 +LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict) + mova LOCAL(div_table_inv),r0 + shll2 r6 + mov.l @(r0,r6),r1 + mova LOCAL(div_table_clz),r0 + lds r4,mach + ! + ! + ! + tst r1,r1 + ! + bt 0f + dmulu.l r1,r4 +0: mov.b @(r0,r5),r1 + clrt + ! + ! + sts mach,r0 + addc r4,r0 + rotcr r0 + mov.l @r15+,r6 + rts + shld r1,r0 + + .balign 4 +LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict) + mova LOCAL(div_table_inv),r0 + shll2 r6 + mov.l @(r0,r6),r1 + mova LOCAL(div_table_clz),r0 + neg r4,r6 + bf 0f + mov r4,r6 +0: lds r6,mach + tst r1,r1 + bt 0f + dmulu.l r1,r6 +0: div0s r4,r5 + mov.b @(r0,r5),r1 + bt/s LOCAL(le128_neg) + clrt + ! + sts mach,r0 + addc r6,r0 + rotcr r0 + mov.l @r15+,r6 + rts + shld r1,r0 + +/* Could trap divide by zero for the cost of one cycle more mispredict penalty: +... + dmulu.l r1,r6 +0: div0s r4,r5 + bt/s LOCAL(le128_neg) + tst r5,r5 + bt LOCAL(div_by_zero) + mov.b @(r0,r5),r1 + sts mach,r0 + addc r6,r0 +... +LOCAL(div_by_zero): + trapa # + .balign 4 +LOCAL(le128_neg): + bt LOCAL(div_by_zero) + mov.b @(r0,r5),r1 + sts mach,r0 + addc r6,r0 +... */ + + .balign 4 +LOCAL(le128_neg): + sts mach,r0 + addc r6,r0 + rotcr r0 + mov.l @r15+,r6 + shad r1,r0 + rts + neg r0,r0 + ENDFUNC(GLOBAL(udivsi3_i4i)) + ENDFUNC(GLOBAL(sdivsi3_i4i)) + +/* This table has been generated by divtab-sh4.c. */ + .balign 4 + .byte -7 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -1 + .byte -1 + .byte 0 +LOCAL(div_table_clz): + .byte 0 + .byte 0 + .byte -1 + .byte -1 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 +/* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32, + or in bit 33 for powers of two. */ + .balign 4 + .long 0x0 + .long 0x2040811 + .long 0x4104105 + .long 0x624DD30 + .long 0x8421085 + .long 0xA6810A7 + .long 0xC9714FC + .long 0xECF56BF + .long 0x11111112 + .long 0x135C8114 + .long 0x15B1E5F8 + .long 0x18118119 + .long 0x1A7B9612 + .long 0x1CF06ADB + .long 0x1F7047DD + .long 0x21FB7813 + .long 0x24924925 + .long 0x27350B89 + .long 0x29E4129F + .long 0x2C9FB4D9 + .long 0x2F684BDB + .long 0x323E34A3 + .long 0x3521CFB3 + .long 0x38138139 + .long 0x3B13B13C + .long 0x3E22CBCF + .long 0x41414142 + .long 0x446F8657 + .long 0x47AE147B + .long 0x4AFD6A06 + .long 0x4E5E0A73 + .long 0x51D07EAF + .long 0x55555556 + .long 0x58ED2309 + .long 0x5C9882BA + .long 0x60581606 + .long 0x642C8591 + .long 0x68168169 + .long 0x6C16C16D + .long 0x702E05C1 + .long 0x745D1746 + .long 0x78A4C818 + .long 0x7D05F418 + .long 0x81818182 + .long 0x86186187 + .long 0x8ACB90F7 + .long 0x8F9C18FA + .long 0x948B0FCE + .long 0x9999999A + .long 0x9EC8E952 + .long 0xA41A41A5 + .long 0xA98EF607 + .long 0xAF286BCB + .long 0xB4E81B4F + .long 0xBACF914D + .long 0xC0E07039 + .long 0xC71C71C8 + .long 0xCD856891 + .long 0xD41D41D5 + .long 0xDAE6076C + .long 0xE1E1E1E2 + .long 0xE9131AC0 + .long 0xF07C1F08 + .long 0xF81F81F9 + .long 0x0 + .long 0x4104105 + .long 0x8421085 + .long 0xC9714FC + .long 0x11111112 + .long 0x15B1E5F8 + .long 0x1A7B9612 + .long 0x1F7047DD + .long 0x24924925 + .long 0x29E4129F + .long 0x2F684BDB + .long 0x3521CFB3 + .long 0x3B13B13C + .long 0x41414142 + .long 0x47AE147B + .long 0x4E5E0A73 + .long 0x55555556 + .long 0x5C9882BA + .long 0x642C8591 + .long 0x6C16C16D + .long 0x745D1746 + .long 0x7D05F418 + .long 0x86186187 + .long 0x8F9C18FA + .long 0x9999999A + .long 0xA41A41A5 + .long 0xAF286BCB + .long 0xBACF914D + .long 0xC71C71C8 + .long 0xD41D41D5 + .long 0xE1E1E1E2 + .long 0xF07C1F08 + .long 0x0 + .long 0x8421085 + .long 0x11111112 + .long 0x1A7B9612 + .long 0x24924925 + .long 0x2F684BDB + .long 0x3B13B13C + .long 0x47AE147B + .long 0x55555556 + .long 0x642C8591 + .long 0x745D1746 + .long 0x86186187 + .long 0x9999999A + .long 0xAF286BCB + .long 0xC71C71C8 + .long 0xE1E1E1E2 + .long 0x0 + .long 0x11111112 + .long 0x24924925 + .long 0x3B13B13C + .long 0x55555556 + .long 0x745D1746 + .long 0x9999999A + .long 0xC71C71C8 + .long 0x0 + .long 0x24924925 + .long 0x55555556 + .long 0x9999999A + .long 0x0 + .long 0x55555556 + .long 0x0 + .long 0x0 +LOCAL(div_table_inv): + .long 0x0 + .long 0x0 + .long 0x0 + .long 0x55555556 + .long 0x0 + .long 0x9999999A + .long 0x55555556 + .long 0x24924925 + .long 0x0 + .long 0xC71C71C8 + .long 0x9999999A + .long 0x745D1746 + .long 0x55555556 + .long 0x3B13B13C + .long 0x24924925 + .long 0x11111112 + .long 0x0 + .long 0xE1E1E1E2 + .long 0xC71C71C8 + .long 0xAF286BCB + .long 0x9999999A + .long 0x86186187 + .long 0x745D1746 + .long 0x642C8591 + .long 0x55555556 + .long 0x47AE147B + .long 0x3B13B13C + .long 0x2F684BDB + .long 0x24924925 + .long 0x1A7B9612 + .long 0x11111112 + .long 0x8421085 + .long 0x0 + .long 0xF07C1F08 + .long 0xE1E1E1E2 + .long 0xD41D41D5 + .long 0xC71C71C8 + .long 0xBACF914D + .long 0xAF286BCB + .long 0xA41A41A5 + .long 0x9999999A + .long 0x8F9C18FA + .long 0x86186187 + .long 0x7D05F418 + .long 0x745D1746 + .long 0x6C16C16D + .long 0x642C8591 + .long 0x5C9882BA + .long 0x55555556 + .long 0x4E5E0A73 + .long 0x47AE147B + .long 0x41414142 + .long 0x3B13B13C + .long 0x3521CFB3 + .long 0x2F684BDB + .long 0x29E4129F + .long 0x24924925 + .long 0x1F7047DD + .long 0x1A7B9612 + .long 0x15B1E5F8 + .long 0x11111112 + .long 0xC9714FC + .long 0x8421085 + .long 0x4104105 + .long 0x0 + .long 0xF81F81F9 + .long 0xF07C1F08 + .long 0xE9131AC0 + .long 0xE1E1E1E2 + .long 0xDAE6076C + .long 0xD41D41D5 + .long 0xCD856891 + .long 0xC71C71C8 + .long 0xC0E07039 + .long 0xBACF914D + .long 0xB4E81B4F + .long 0xAF286BCB + .long 0xA98EF607 + .long 0xA41A41A5 + .long 0x9EC8E952 + .long 0x9999999A + .long 0x948B0FCE + .long 0x8F9C18FA + .long 0x8ACB90F7 + .long 0x86186187 + .long 0x81818182 + .long 0x7D05F418 + .long 0x78A4C818 + .long 0x745D1746 + .long 0x702E05C1 + .long 0x6C16C16D + .long 0x68168169 + .long 0x642C8591 + .long 0x60581606 + .long 0x5C9882BA + .long 0x58ED2309 + .long 0x55555556 + .long 0x51D07EAF + .long 0x4E5E0A73 + .long 0x4AFD6A06 + .long 0x47AE147B + .long 0x446F8657 + .long 0x41414142 + .long 0x3E22CBCF + .long 0x3B13B13C + .long 0x38138139 + .long 0x3521CFB3 + .long 0x323E34A3 + .long 0x2F684BDB + .long 0x2C9FB4D9 + .long 0x29E4129F + .long 0x27350B89 + .long 0x24924925 + .long 0x21FB7813 + .long 0x1F7047DD + .long 0x1CF06ADB + .long 0x1A7B9612 + .long 0x18118119 + .long 0x15B1E5F8 + .long 0x135C8114 + .long 0x11111112 + .long 0xECF56BF + .long 0xC9714FC + .long 0xA6810A7 + .long 0x8421085 + .long 0x624DD30 + .long 0x4104105 + .long 0x2040811 + /* maximum error: 0.987342 scaled: 0.921875*/ + +#endif /* SH3 / SH4 */ + +#endif /* L_div_table */ +#endif /* !__SHMEDIA__ */ diff --git a/gcc/config/sh/lib1funcs-Os-4-200.asm b/gcc/config/sh/lib1funcs-Os-4-200.asm new file mode 100644 index 000000000..aae57ccd3 --- /dev/null +++ b/gcc/config/sh/lib1funcs-Os-4-200.asm @@ -0,0 +1,322 @@ +/* Copyright (C) 2006, 2009 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Moderately Space-optimized libgcc routines for the Renesas SH / + STMicroelectronics ST40 CPUs. + Contributed by J"orn Rennecke joern.rennecke@st.com. */ + +#include "lib1funcs.h" + +#if !__SHMEDIA__ +#ifdef L_udivsi3_i4i + +/* 88 bytes; sh4-200 cycle counts: + divisor >= 2G: 11 cycles + dividend < 2G: 48 cycles + dividend >= 2G: divisor != 1: 54 cycles + dividend >= 2G, divisor == 1: 22 cycles */ +#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__) +!! args in r4 and r5, result in r0, clobber r1 + + .global GLOBAL(udivsi3_i4i) + FUNC(GLOBAL(udivsi3_i4i)) +GLOBAL(udivsi3_i4i): + mova L1,r0 + cmp/pz r5 + sts fpscr,r1 + lds.l @r0+,fpscr + sts.l fpul,@-r15 + bf LOCAL(huge_divisor) + mov.l r1,@-r15 + lds r4,fpul + cmp/pz r4 +#ifdef FMOVD_WORKS + fmov.d dr0,@-r15 + float fpul,dr0 + fmov.d dr2,@-r15 + bt LOCAL(dividend_adjusted) + mov #1,r1 + fmov.d @r0,dr2 + cmp/eq r1,r5 + bt LOCAL(div_by_1) + fadd dr2,dr0 +LOCAL(dividend_adjusted): + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 +LOCAL(div_by_1): + fmov.d @r15+,dr2 + ftrc dr0,fpul + fmov.d @r15+,dr0 +#else /* !FMOVD_WORKS */ + fmov.s DR01,@-r15 + mov #1,r1 + fmov.s DR00,@-r15 + float fpul,dr0 + fmov.s DR21,@-r15 + bt/s LOCAL(dividend_adjusted) + fmov.s DR20,@-r15 + cmp/eq r1,r5 + bt LOCAL(div_by_1) + fmov.s @r0+,DR20 + fmov.s @r0,DR21 + fadd dr2,dr0 +LOCAL(dividend_adjusted): + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 +LOCAL(div_by_1): + fmov.s @r15+,DR20 + fmov.s @r15+,DR21 + ftrc dr0,fpul + fmov.s @r15+,DR00 + fmov.s @r15+,DR01 +#endif /* !FMOVD_WORKS */ + lds.l @r15+,fpscr + sts fpul,r0 + rts + lds.l @r15+,fpul + +#ifdef FMOVD_WORKS + .p2align 3 ! make double below 8 byte aligned. +#endif +LOCAL(huge_divisor): + lds r1,fpscr + add #4,r15 + cmp/hs r5,r4 + rts + movt r0 + + .p2align 2 +L1: +#ifndef FMOVD_WORKS + .long 0x80000 +#else + .long 0x180000 +#endif + .double 4294967296 + + ENDFUNC(GLOBAL(udivsi3_i4i)) +#elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */ + +#if 0 +/* With 36 bytes, the following would probably be the most compact + implementation, but with 139 cycles on an sh4-200, it is extremely slow. */ +GLOBAL(udivsi3_i4i): + mov.l r2,@-r15 + mov #0,r1 + div0u + mov r1,r2 + mov.l r3,@-r15 + mov r1,r3 + sett + mov r4,r0 +LOCAL(loop): + rotcr r2 + ; + bt/s LOCAL(end) + cmp/gt r2,r3 + rotcl r0 + bra LOCAL(loop) + div1 r5,r1 +LOCAL(end): + rotcl r0 + mov.l @r15+,r3 + rts + mov.l @r15+,r2 +#endif /* 0 */ + +/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i + sh4-200 run times: + udiv small divisor: 55 cycles + udiv large divisor: 52 cycles + sdiv small divisor, positive result: 59 cycles + sdiv large divisor, positive result: 56 cycles + sdiv small divisor, negative result: 65 cycles (*) + sdiv large divisor, negative result: 62 cycles (*) + (*): r2 is restored in the rts delay slot and has a lingering latency + of two more cycles. */ + .balign 4 + .global GLOBAL(udivsi3_i4i) + FUNC(GLOBAL(udivsi3_i4i)) + FUNC(GLOBAL(sdivsi3_i4i)) +GLOBAL(udivsi3_i4i): + sts pr,r1 + mov.l r4,@-r15 + extu.w r5,r0 + cmp/eq r5,r0 + swap.w r4,r0 + shlr16 r4 + bf/s LOCAL(large_divisor) + div0u + mov.l r5,@-r15 + shll16 r5 +LOCAL(sdiv_small_divisor): + div1 r5,r4 + bsr LOCAL(div6) + div1 r5,r4 + div1 r5,r4 + bsr LOCAL(div6) + div1 r5,r4 + xtrct r4,r0 + xtrct r0,r4 + bsr LOCAL(div7) + swap.w r4,r4 + div1 r5,r4 + bsr LOCAL(div7) + div1 r5,r4 + xtrct r4,r0 + mov.l @r15+,r5 + swap.w r0,r0 + mov.l @r15+,r4 + jmp @r1 + rotcl r0 +LOCAL(div7): + div1 r5,r4 +LOCAL(div6): + div1 r5,r4; div1 r5,r4; div1 r5,r4 + div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 + +LOCAL(divx3): + rotcl r0 + div1 r5,r4 + rotcl r0 + div1 r5,r4 + rotcl r0 + rts + div1 r5,r4 + +LOCAL(large_divisor): + mov.l r5,@-r15 +LOCAL(sdiv_large_divisor): + xor r4,r0 + .rept 4 + rotcl r0 + bsr LOCAL(divx3) + div1 r5,r4 + .endr + mov.l @r15+,r5 + mov.l @r15+,r4 + jmp @r1 + rotcl r0 + ENDFUNC(GLOBAL(udivsi3_i4i)) + + .global GLOBAL(sdivsi3_i4i) +GLOBAL(sdivsi3_i4i): + mov.l r4,@-r15 + cmp/pz r5 + mov.l r5,@-r15 + bt/s LOCAL(pos_divisor) + cmp/pz r4 + neg r5,r5 + extu.w r5,r0 + bt/s LOCAL(neg_result) + cmp/eq r5,r0 + neg r4,r4 +LOCAL(pos_result): + swap.w r4,r0 + bra LOCAL(sdiv_check_divisor) + sts pr,r1 +LOCAL(pos_divisor): + extu.w r5,r0 + bt/s LOCAL(pos_result) + cmp/eq r5,r0 + neg r4,r4 +LOCAL(neg_result): + mova LOCAL(negate_result),r0 + ; + mov r0,r1 + swap.w r4,r0 + lds r2,macl + sts pr,r2 +LOCAL(sdiv_check_divisor): + shlr16 r4 + bf/s LOCAL(sdiv_large_divisor) + div0u + bra LOCAL(sdiv_small_divisor) + shll16 r5 + .balign 4 +LOCAL(negate_result): + neg r0,r0 + jmp @r2 + sts macl,r2 + ENDFUNC(GLOBAL(sdivsi3_i4i)) +#endif /* !__SH_FPU_DOUBLE__ */ +#endif /* L_udivsi3_i4i */ + +#ifdef L_sdivsi3_i4i +#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__) +/* 48 bytes, 45 cycles on sh4-200 */ +!! args in r4 and r5, result in r0, clobber r1 + + .global GLOBAL(sdivsi3_i4i) + FUNC(GLOBAL(sdivsi3_i4i)) +GLOBAL(sdivsi3_i4i): + sts.l fpscr,@-r15 + sts fpul,r1 + mova L1,r0 + lds.l @r0+,fpscr + lds r4,fpul +#ifdef FMOVD_WORKS + fmov.d dr0,@-r15 + float fpul,dr0 + lds r5,fpul + fmov.d dr2,@-r15 +#else + fmov.s DR01,@-r15 + fmov.s DR00,@-r15 + float fpul,dr0 + lds r5,fpul + fmov.s DR21,@-r15 + fmov.s DR20,@-r15 +#endif + float fpul,dr2 + fdiv dr2,dr0 +#ifdef FMOVD_WORKS + fmov.d @r15+,dr2 +#else + fmov.s @r15+,DR20 + fmov.s @r15+,DR21 +#endif + ftrc dr0,fpul +#ifdef FMOVD_WORKS + fmov.d @r15+,dr0 +#else + fmov.s @r15+,DR00 + fmov.s @r15+,DR01 +#endif + lds.l @r15+,fpscr + sts fpul,r0 + rts + lds r1,fpul + + .p2align 2 +L1: +#ifndef FMOVD_WORKS + .long 0x80000 +#else + .long 0x180000 +#endif + + ENDFUNC(GLOBAL(sdivsi3_i4i)) +#endif /* __SH_FPU_DOUBLE__ */ +#endif /* L_sdivsi3_i4i */ +#endif /* !__SHMEDIA__ */ diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm new file mode 100644 index 000000000..2f0ca16cd --- /dev/null +++ b/gcc/config/sh/lib1funcs.asm @@ -0,0 +1,3933 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2009 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +#include "lib1funcs.h" + +/* t-vxworks needs to build both PIC and non-PIC versions of libgcc, + so it is more convenient to define NO_FPSCR_VALUES here than to + define it on the command line. */ +#if defined __vxworks && defined __PIC__ +#define NO_FPSCR_VALUES +#endif + +#if ! __SH5__ +#ifdef L_ashiftrt + .global GLOBAL(ashiftrt_r4_0) + .global GLOBAL(ashiftrt_r4_1) + .global GLOBAL(ashiftrt_r4_2) + .global GLOBAL(ashiftrt_r4_3) + .global GLOBAL(ashiftrt_r4_4) + .global GLOBAL(ashiftrt_r4_5) + .global GLOBAL(ashiftrt_r4_6) + .global GLOBAL(ashiftrt_r4_7) + .global GLOBAL(ashiftrt_r4_8) + .global GLOBAL(ashiftrt_r4_9) + .global GLOBAL(ashiftrt_r4_10) + .global GLOBAL(ashiftrt_r4_11) + .global GLOBAL(ashiftrt_r4_12) + .global GLOBAL(ashiftrt_r4_13) + .global GLOBAL(ashiftrt_r4_14) + .global GLOBAL(ashiftrt_r4_15) + .global GLOBAL(ashiftrt_r4_16) + .global GLOBAL(ashiftrt_r4_17) + .global GLOBAL(ashiftrt_r4_18) + .global GLOBAL(ashiftrt_r4_19) + .global GLOBAL(ashiftrt_r4_20) + .global GLOBAL(ashiftrt_r4_21) + .global GLOBAL(ashiftrt_r4_22) + .global GLOBAL(ashiftrt_r4_23) + .global GLOBAL(ashiftrt_r4_24) + .global GLOBAL(ashiftrt_r4_25) + .global GLOBAL(ashiftrt_r4_26) + .global GLOBAL(ashiftrt_r4_27) + .global GLOBAL(ashiftrt_r4_28) + .global GLOBAL(ashiftrt_r4_29) + .global GLOBAL(ashiftrt_r4_30) + .global GLOBAL(ashiftrt_r4_31) + .global GLOBAL(ashiftrt_r4_32) + + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) + + .align 1 +GLOBAL(ashiftrt_r4_32): +GLOBAL(ashiftrt_r4_31): + rotcl r4 + rts + subc r4,r4 + +GLOBAL(ashiftrt_r4_30): + shar r4 +GLOBAL(ashiftrt_r4_29): + shar r4 +GLOBAL(ashiftrt_r4_28): + shar r4 +GLOBAL(ashiftrt_r4_27): + shar r4 +GLOBAL(ashiftrt_r4_26): + shar r4 +GLOBAL(ashiftrt_r4_25): + shar r4 +GLOBAL(ashiftrt_r4_24): + shlr16 r4 + shlr8 r4 + rts + exts.b r4,r4 + +GLOBAL(ashiftrt_r4_23): + shar r4 +GLOBAL(ashiftrt_r4_22): + shar r4 +GLOBAL(ashiftrt_r4_21): + shar r4 +GLOBAL(ashiftrt_r4_20): + shar r4 +GLOBAL(ashiftrt_r4_19): + shar r4 +GLOBAL(ashiftrt_r4_18): + shar r4 +GLOBAL(ashiftrt_r4_17): + shar r4 +GLOBAL(ashiftrt_r4_16): + shlr16 r4 + rts + exts.w r4,r4 + +GLOBAL(ashiftrt_r4_15): + shar r4 +GLOBAL(ashiftrt_r4_14): + shar r4 +GLOBAL(ashiftrt_r4_13): + shar r4 +GLOBAL(ashiftrt_r4_12): + shar r4 +GLOBAL(ashiftrt_r4_11): + shar r4 +GLOBAL(ashiftrt_r4_10): + shar r4 +GLOBAL(ashiftrt_r4_9): + shar r4 +GLOBAL(ashiftrt_r4_8): + shar r4 +GLOBAL(ashiftrt_r4_7): + shar r4 +GLOBAL(ashiftrt_r4_6): + shar r4 +GLOBAL(ashiftrt_r4_5): + shar r4 +GLOBAL(ashiftrt_r4_4): + shar r4 +GLOBAL(ashiftrt_r4_3): + shar r4 +GLOBAL(ashiftrt_r4_2): + shar r4 +GLOBAL(ashiftrt_r4_1): + rts + shar r4 + +GLOBAL(ashiftrt_r4_0): + rts + nop + + ENDFUNC(GLOBAL(ashiftrt_r4_0)) + ENDFUNC(GLOBAL(ashiftrt_r4_1)) + ENDFUNC(GLOBAL(ashiftrt_r4_2)) + ENDFUNC(GLOBAL(ashiftrt_r4_3)) + ENDFUNC(GLOBAL(ashiftrt_r4_4)) + ENDFUNC(GLOBAL(ashiftrt_r4_5)) + ENDFUNC(GLOBAL(ashiftrt_r4_6)) + ENDFUNC(GLOBAL(ashiftrt_r4_7)) + ENDFUNC(GLOBAL(ashiftrt_r4_8)) + ENDFUNC(GLOBAL(ashiftrt_r4_9)) + ENDFUNC(GLOBAL(ashiftrt_r4_10)) + ENDFUNC(GLOBAL(ashiftrt_r4_11)) + ENDFUNC(GLOBAL(ashiftrt_r4_12)) + ENDFUNC(GLOBAL(ashiftrt_r4_13)) + ENDFUNC(GLOBAL(ashiftrt_r4_14)) + ENDFUNC(GLOBAL(ashiftrt_r4_15)) + ENDFUNC(GLOBAL(ashiftrt_r4_16)) + ENDFUNC(GLOBAL(ashiftrt_r4_17)) + ENDFUNC(GLOBAL(ashiftrt_r4_18)) + ENDFUNC(GLOBAL(ashiftrt_r4_19)) + ENDFUNC(GLOBAL(ashiftrt_r4_20)) + ENDFUNC(GLOBAL(ashiftrt_r4_21)) + ENDFUNC(GLOBAL(ashiftrt_r4_22)) + ENDFUNC(GLOBAL(ashiftrt_r4_23)) + ENDFUNC(GLOBAL(ashiftrt_r4_24)) + ENDFUNC(GLOBAL(ashiftrt_r4_25)) + ENDFUNC(GLOBAL(ashiftrt_r4_26)) + ENDFUNC(GLOBAL(ashiftrt_r4_27)) + ENDFUNC(GLOBAL(ashiftrt_r4_28)) + ENDFUNC(GLOBAL(ashiftrt_r4_29)) + ENDFUNC(GLOBAL(ashiftrt_r4_30)) + ENDFUNC(GLOBAL(ashiftrt_r4_31)) + ENDFUNC(GLOBAL(ashiftrt_r4_32)) +#endif + +#ifdef L_ashiftrt_n + +! +! GLOBAL(ashrsi3) +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + + .global GLOBAL(ashrsi3) + HIDDEN_FUNC(GLOBAL(ashrsi3)) + .align 2 +GLOBAL(ashrsi3): + mov #31,r0 + and r0,r5 + mova LOCAL(ashrsi3_table),r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +LOCAL(ashrsi3_table): + .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) + +LOCAL(ashrsi3_31): + rotcl r0 + rts + subc r0,r0 + +LOCAL(ashrsi3_30): + shar r0 +LOCAL(ashrsi3_29): + shar r0 +LOCAL(ashrsi3_28): + shar r0 +LOCAL(ashrsi3_27): + shar r0 +LOCAL(ashrsi3_26): + shar r0 +LOCAL(ashrsi3_25): + shar r0 +LOCAL(ashrsi3_24): + shlr16 r0 + shlr8 r0 + rts + exts.b r0,r0 + +LOCAL(ashrsi3_23): + shar r0 +LOCAL(ashrsi3_22): + shar r0 +LOCAL(ashrsi3_21): + shar r0 +LOCAL(ashrsi3_20): + shar r0 +LOCAL(ashrsi3_19): + shar r0 +LOCAL(ashrsi3_18): + shar r0 +LOCAL(ashrsi3_17): + shar r0 +LOCAL(ashrsi3_16): + shlr16 r0 + rts + exts.w r0,r0 + +LOCAL(ashrsi3_15): + shar r0 +LOCAL(ashrsi3_14): + shar r0 +LOCAL(ashrsi3_13): + shar r0 +LOCAL(ashrsi3_12): + shar r0 +LOCAL(ashrsi3_11): + shar r0 +LOCAL(ashrsi3_10): + shar r0 +LOCAL(ashrsi3_9): + shar r0 +LOCAL(ashrsi3_8): + shar r0 +LOCAL(ashrsi3_7): + shar r0 +LOCAL(ashrsi3_6): + shar r0 +LOCAL(ashrsi3_5): + shar r0 +LOCAL(ashrsi3_4): + shar r0 +LOCAL(ashrsi3_3): + shar r0 +LOCAL(ashrsi3_2): + shar r0 +LOCAL(ashrsi3_1): + rts + shar r0 + +LOCAL(ashrsi3_0): + rts + nop + + ENDFUNC(GLOBAL(ashrsi3)) +#endif + +#ifdef L_ashiftlt + +! +! GLOBAL(ashlsi3) +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global GLOBAL(ashlsi3) + HIDDEN_FUNC(GLOBAL(ashlsi3)) + .align 2 +GLOBAL(ashlsi3): + mov #31,r0 + and r0,r5 + mova LOCAL(ashlsi3_table),r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +LOCAL(ashlsi3_table): + .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) + +LOCAL(ashlsi3_6): + shll2 r0 +LOCAL(ashlsi3_4): + shll2 r0 +LOCAL(ashlsi3_2): + rts + shll2 r0 + +LOCAL(ashlsi3_7): + shll2 r0 +LOCAL(ashlsi3_5): + shll2 r0 +LOCAL(ashlsi3_3): + shll2 r0 +LOCAL(ashlsi3_1): + rts + shll r0 + +LOCAL(ashlsi3_14): + shll2 r0 +LOCAL(ashlsi3_12): + shll2 r0 +LOCAL(ashlsi3_10): + shll2 r0 +LOCAL(ashlsi3_8): + rts + shll8 r0 + +LOCAL(ashlsi3_15): + shll2 r0 +LOCAL(ashlsi3_13): + shll2 r0 +LOCAL(ashlsi3_11): + shll2 r0 +LOCAL(ashlsi3_9): + shll8 r0 + rts + shll r0 + +LOCAL(ashlsi3_22): + shll2 r0 +LOCAL(ashlsi3_20): + shll2 r0 +LOCAL(ashlsi3_18): + shll2 r0 +LOCAL(ashlsi3_16): + rts + shll16 r0 + +LOCAL(ashlsi3_23): + shll2 r0 +LOCAL(ashlsi3_21): + shll2 r0 +LOCAL(ashlsi3_19): + shll2 r0 +LOCAL(ashlsi3_17): + shll16 r0 + rts + shll r0 + +LOCAL(ashlsi3_30): + shll2 r0 +LOCAL(ashlsi3_28): + shll2 r0 +LOCAL(ashlsi3_26): + shll2 r0 +LOCAL(ashlsi3_24): + shll16 r0 + rts + shll8 r0 + +LOCAL(ashlsi3_31): + shll2 r0 +LOCAL(ashlsi3_29): + shll2 r0 +LOCAL(ashlsi3_27): + shll2 r0 +LOCAL(ashlsi3_25): + shll16 r0 + shll8 r0 + rts + shll r0 + +LOCAL(ashlsi3_0): + rts + nop + + ENDFUNC(GLOBAL(ashlsi3)) +#endif + +#ifdef L_lshiftrt + +! +! GLOBAL(lshrsi3) +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global GLOBAL(lshrsi3) + HIDDEN_FUNC(GLOBAL(lshrsi3)) + .align 2 +GLOBAL(lshrsi3): + mov #31,r0 + and r0,r5 + mova LOCAL(lshrsi3_table),r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +LOCAL(lshrsi3_table): + .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) + +LOCAL(lshrsi3_6): + shlr2 r0 +LOCAL(lshrsi3_4): + shlr2 r0 +LOCAL(lshrsi3_2): + rts + shlr2 r0 + +LOCAL(lshrsi3_7): + shlr2 r0 +LOCAL(lshrsi3_5): + shlr2 r0 +LOCAL(lshrsi3_3): + shlr2 r0 +LOCAL(lshrsi3_1): + rts + shlr r0 + +LOCAL(lshrsi3_14): + shlr2 r0 +LOCAL(lshrsi3_12): + shlr2 r0 +LOCAL(lshrsi3_10): + shlr2 r0 +LOCAL(lshrsi3_8): + rts + shlr8 r0 + +LOCAL(lshrsi3_15): + shlr2 r0 +LOCAL(lshrsi3_13): + shlr2 r0 +LOCAL(lshrsi3_11): + shlr2 r0 +LOCAL(lshrsi3_9): + shlr8 r0 + rts + shlr r0 + +LOCAL(lshrsi3_22): + shlr2 r0 +LOCAL(lshrsi3_20): + shlr2 r0 +LOCAL(lshrsi3_18): + shlr2 r0 +LOCAL(lshrsi3_16): + rts + shlr16 r0 + +LOCAL(lshrsi3_23): + shlr2 r0 +LOCAL(lshrsi3_21): + shlr2 r0 +LOCAL(lshrsi3_19): + shlr2 r0 +LOCAL(lshrsi3_17): + shlr16 r0 + rts + shlr r0 + +LOCAL(lshrsi3_30): + shlr2 r0 +LOCAL(lshrsi3_28): + shlr2 r0 +LOCAL(lshrsi3_26): + shlr2 r0 +LOCAL(lshrsi3_24): + shlr16 r0 + rts + shlr8 r0 + +LOCAL(lshrsi3_31): + shlr2 r0 +LOCAL(lshrsi3_29): + shlr2 r0 +LOCAL(lshrsi3_27): + shlr2 r0 +LOCAL(lshrsi3_25): + shlr16 r0 + shlr8 r0 + rts + shlr r0 + +LOCAL(lshrsi3_0): + rts + nop + + ENDFUNC(GLOBAL(lshrsi3)) +#endif + +#ifdef L_movmem + .text + .balign 4 + .global GLOBAL(movmem) + HIDDEN_FUNC(GLOBAL(movmem)) + HIDDEN_ALIAS(movstr,movmem) + /* This would be a lot simpler if r6 contained the byte count + minus 64, and we wouldn't be called here for a byte count of 64. */ +GLOBAL(movmem): + sts.l pr,@-r15 + shll2 r6 + bsr GLOBAL(movmemSI52+2) + mov.l @(48,r5),r0 + .balign 4 +LOCAL(movmem_loop): /* Reached with rts */ + mov.l @(60,r5),r0 + add #-64,r6 + mov.l r0,@(60,r4) + tst r6,r6 + mov.l @(56,r5),r0 + bt LOCAL(movmem_done) + mov.l r0,@(56,r4) + cmp/pl r6 + mov.l @(52,r5),r0 + add #64,r5 + mov.l r0,@(52,r4) + add #64,r4 + bt GLOBAL(movmemSI52) +! done all the large groups, do the remainder +! jump to movmem+ + mova GLOBAL(movmemSI4)+4,r0 + add r6,r0 + jmp @r0 +LOCAL(movmem_done): ! share slot insn, works out aligned. + lds.l @r15+,pr + mov.l r0,@(56,r4) + mov.l @(52,r5),r0 + rts + mov.l r0,@(52,r4) + .balign 4 +! ??? We need aliases movstr* for movmem* for the older libraries. These +! aliases will be removed at the some point in the future. + .global GLOBAL(movmemSI64) + HIDDEN_FUNC(GLOBAL(movmemSI64)) + HIDDEN_ALIAS(movstrSI64,movmemSI64) +GLOBAL(movmemSI64): + mov.l @(60,r5),r0 + mov.l r0,@(60,r4) + .global GLOBAL(movmemSI60) + HIDDEN_FUNC(GLOBAL(movmemSI60)) + HIDDEN_ALIAS(movstrSI60,movmemSI60) +GLOBAL(movmemSI60): + mov.l @(56,r5),r0 + mov.l r0,@(56,r4) + .global GLOBAL(movmemSI56) + HIDDEN_FUNC(GLOBAL(movmemSI56)) + HIDDEN_ALIAS(movstrSI56,movmemSI56) +GLOBAL(movmemSI56): + mov.l @(52,r5),r0 + mov.l r0,@(52,r4) + .global GLOBAL(movmemSI52) + HIDDEN_FUNC(GLOBAL(movmemSI52)) + HIDDEN_ALIAS(movstrSI52,movmemSI52) +GLOBAL(movmemSI52): + mov.l @(48,r5),r0 + mov.l r0,@(48,r4) + .global GLOBAL(movmemSI48) + HIDDEN_FUNC(GLOBAL(movmemSI48)) + HIDDEN_ALIAS(movstrSI48,movmemSI48) +GLOBAL(movmemSI48): + mov.l @(44,r5),r0 + mov.l r0,@(44,r4) + .global GLOBAL(movmemSI44) + HIDDEN_FUNC(GLOBAL(movmemSI44)) + HIDDEN_ALIAS(movstrSI44,movmemSI44) +GLOBAL(movmemSI44): + mov.l @(40,r5),r0 + mov.l r0,@(40,r4) + .global GLOBAL(movmemSI40) + HIDDEN_FUNC(GLOBAL(movmemSI40)) + HIDDEN_ALIAS(movstrSI40,movmemSI40) +GLOBAL(movmemSI40): + mov.l @(36,r5),r0 + mov.l r0,@(36,r4) + .global GLOBAL(movmemSI36) + HIDDEN_FUNC(GLOBAL(movmemSI36)) + HIDDEN_ALIAS(movstrSI36,movmemSI36) +GLOBAL(movmemSI36): + mov.l @(32,r5),r0 + mov.l r0,@(32,r4) + .global GLOBAL(movmemSI32) + HIDDEN_FUNC(GLOBAL(movmemSI32)) + HIDDEN_ALIAS(movstrSI32,movmemSI32) +GLOBAL(movmemSI32): + mov.l @(28,r5),r0 + mov.l r0,@(28,r4) + .global GLOBAL(movmemSI28) + HIDDEN_FUNC(GLOBAL(movmemSI28)) + HIDDEN_ALIAS(movstrSI28,movmemSI28) +GLOBAL(movmemSI28): + mov.l @(24,r5),r0 + mov.l r0,@(24,r4) + .global GLOBAL(movmemSI24) + HIDDEN_FUNC(GLOBAL(movmemSI24)) + HIDDEN_ALIAS(movstrSI24,movmemSI24) +GLOBAL(movmemSI24): + mov.l @(20,r5),r0 + mov.l r0,@(20,r4) + .global GLOBAL(movmemSI20) + HIDDEN_FUNC(GLOBAL(movmemSI20)) + HIDDEN_ALIAS(movstrSI20,movmemSI20) +GLOBAL(movmemSI20): + mov.l @(16,r5),r0 + mov.l r0,@(16,r4) + .global GLOBAL(movmemSI16) + HIDDEN_FUNC(GLOBAL(movmemSI16)) + HIDDEN_ALIAS(movstrSI16,movmemSI16) +GLOBAL(movmemSI16): + mov.l @(12,r5),r0 + mov.l r0,@(12,r4) + .global GLOBAL(movmemSI12) + HIDDEN_FUNC(GLOBAL(movmemSI12)) + HIDDEN_ALIAS(movstrSI12,movmemSI12) +GLOBAL(movmemSI12): + mov.l @(8,r5),r0 + mov.l r0,@(8,r4) + .global GLOBAL(movmemSI8) + HIDDEN_FUNC(GLOBAL(movmemSI8)) + HIDDEN_ALIAS(movstrSI8,movmemSI8) +GLOBAL(movmemSI8): + mov.l @(4,r5),r0 + mov.l r0,@(4,r4) + .global GLOBAL(movmemSI4) + HIDDEN_FUNC(GLOBAL(movmemSI4)) + HIDDEN_ALIAS(movstrSI4,movmemSI4) +GLOBAL(movmemSI4): + mov.l @(0,r5),r0 + rts + mov.l r0,@(0,r4) + + ENDFUNC(GLOBAL(movmemSI64)) + ENDFUNC(GLOBAL(movmemSI60)) + ENDFUNC(GLOBAL(movmemSI56)) + ENDFUNC(GLOBAL(movmemSI52)) + ENDFUNC(GLOBAL(movmemSI48)) + ENDFUNC(GLOBAL(movmemSI44)) + ENDFUNC(GLOBAL(movmemSI40)) + ENDFUNC(GLOBAL(movmemSI36)) + ENDFUNC(GLOBAL(movmemSI32)) + ENDFUNC(GLOBAL(movmemSI28)) + ENDFUNC(GLOBAL(movmemSI24)) + ENDFUNC(GLOBAL(movmemSI20)) + ENDFUNC(GLOBAL(movmemSI16)) + ENDFUNC(GLOBAL(movmemSI12)) + ENDFUNC(GLOBAL(movmemSI8)) + ENDFUNC(GLOBAL(movmemSI4)) + ENDFUNC(GLOBAL(movmem)) +#endif + +#ifdef L_movmem_i4 + .text + .global GLOBAL(movmem_i4_even) + .global GLOBAL(movmem_i4_odd) + .global GLOBAL(movmemSI12_i4) + + HIDDEN_FUNC(GLOBAL(movmem_i4_even)) + HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) + HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) + + HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) + HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) + HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) + + .p2align 5 +L_movmem_2mod4_end: + mov.l r0,@(16,r4) + rts + mov.l r1,@(20,r4) + + .p2align 2 + +GLOBAL(movmem_i4_even): + mov.l @r5+,r0 + bra L_movmem_start_even + mov.l @r5+,r1 + +GLOBAL(movmem_i4_odd): + mov.l @r5+,r1 + add #-4,r4 + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r1,@(4,r4) + mov.l r2,@(8,r4) + +L_movmem_loop: + mov.l r3,@(12,r4) + dt r6 + mov.l @r5+,r0 + bt/s L_movmem_2mod4_end + mov.l @r5+,r1 + add #16,r4 +L_movmem_start_even: + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r0,@r4 + dt r6 + mov.l r1,@(4,r4) + bf/s L_movmem_loop + mov.l r2,@(8,r4) + rts + mov.l r3,@(12,r4) + + ENDFUNC(GLOBAL(movmem_i4_even)) + ENDFUNC(GLOBAL(movmem_i4_odd)) + + .p2align 4 +GLOBAL(movmemSI12_i4): + mov.l @r5,r0 + mov.l @(4,r5),r1 + mov.l @(8,r5),r2 + mov.l r0,@r4 + mov.l r1,@(4,r4) + rts + mov.l r2,@(8,r4) + + ENDFUNC(GLOBAL(movmemSI12_i4)) +#endif + +#ifdef L_mulsi3 + + + .global GLOBAL(mulsi3) + HIDDEN_FUNC(GLOBAL(mulsi3)) + +! r4 = aabb +! r5 = ccdd +! r0 = aabb*ccdd via partial products +! +! if aa == 0 and cc = 0 +! r0 = bb*dd +! +! else +! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) +! + +GLOBAL(mulsi3): + mulu.w r4,r5 ! multiply the lsws macl=bb*dd + mov r5,r3 ! r3 = ccdd + swap.w r4,r2 ! r2 = bbaa + xtrct r2,r3 ! r3 = aacc + tst r3,r3 ! msws zero ? + bf hiset + rts ! yes - then we have the answer + sts macl,r0 + +hiset: sts macl,r0 ! r0 = bb*dd + mulu.w r2,r5 ! brewing macl = aa*dd + sts macl,r1 + mulu.w r3,r4 ! brewing macl = cc*bb + sts macl,r2 + add r1,r2 + shll16 r2 + rts + add r2,r0 + + ENDFUNC(GLOBAL(mulsi3)) +#endif +#endif /* ! __SH5__ */ +#ifdef L_sdivsi3_i4 + .title "SH DIVIDE" +!! 4 byte integer Divide code for the Renesas SH +#ifdef __SH4__ +!! args in r4 and r5, result in fpul, clobber dr0, dr2 + + .global GLOBAL(sdivsi3_i4) + HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) +GLOBAL(sdivsi3_i4): + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + rts + ftrc dr0,fpul + + ENDFUNC(GLOBAL(sdivsi3_i4)) +#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) +!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 + +#if ! __SH5__ || __SH5__ == 32 +#if __SH5__ + .mode SHcompact +#endif + .global GLOBAL(sdivsi3_i4) + HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) +GLOBAL(sdivsi3_i4): + sts.l fpscr,@-r15 + mov #8,r2 + swap.w r2,r2 + lds r2,fpscr + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + lds.l @r15+,fpscr + + ENDFUNC(GLOBAL(sdivsi3_i4)) +#endif /* ! __SH5__ || __SH5__ == 32 */ +#endif /* ! __SH4__ */ +#endif + +#ifdef L_sdivsi3 +/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with + sh2e/sh3e code. */ +#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) +!! +!! Steve Chamberlain +!! sac@cygnus.com +!! +!! + +!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit + + .global GLOBAL(sdivsi3) +#if __SHMEDIA__ +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif + .align 2 +#if 0 +/* The assembly code that follows is a hand-optimized version of the C + code that follows. Note that the registers that are modified are + exactly those listed as clobbered in the patterns divsi3_i1 and + divsi3_i1_media. + +int __sdivsi3 (i, j) + int i, j; +{ + register unsigned long long r18 asm ("r18"); + register unsigned long long r19 asm ("r19"); + register unsigned long long r0 asm ("r0") = 0; + register unsigned long long r1 asm ("r1") = 1; + register int r2 asm ("r2") = i >> 31; + register int r3 asm ("r3") = j >> 31; + + r2 = r2 ? r2 : r1; + r3 = r3 ? r3 : r1; + r18 = i * r2; + r19 = j * r3; + r2 *= r3; + + r19 <<= 31; + r1 <<= 31; + do + if (r18 >= r19) + r0 |= r1, r18 -= r19; + while (r19 >>= 1, r1 >>= 1); + + return r2 * (int)r0; +} +*/ +GLOBAL(sdivsi3): + pt/l LOCAL(sdivsi3_dontadd), tr2 + pt/l LOCAL(sdivsi3_loop), tr1 + ptabs/l r18, tr0 + movi 0, r0 + movi 1, r1 + shari.l r4, 31, r2 + shari.l r5, 31, r3 + cmveq r2, r1, r2 + cmveq r3, r1, r3 + muls.l r4, r2, r18 + muls.l r5, r3, r19 + muls.l r2, r3, r2 + shlli r19, 31, r19 + shlli r1, 31, r1 +LOCAL(sdivsi3_loop): + bgtu r19, r18, tr2 + or r0, r1, r0 + sub r18, r19, r18 +LOCAL(sdivsi3_dontadd): + shlri r1, 1, r1 + shlri r19, 1, r19 + bnei r1, 0, tr1 + muls.l r0, r2, r0 + add.l r0, r63, r0 + blink tr0, r63 +#elif 0 /* ! 0 */ + // inputs: r4,r5 + // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 + // result in r0 +GLOBAL(sdivsi3): + // can create absolute value without extra latency, + // but dependent on proper sign extension of inputs: + // shari.l r5,31,r2 + // xor r5,r2,r20 + // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. + shari.l r5,31,r2 + ori r2,1,r2 + muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. + movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 + shari.l r4,31,r3 + nsb r20,r0 + shlld r20,r0,r25 + shlri r25,48,r25 + sub r19,r25,r1 + mmulfx.w r1,r1,r2 + mshflo.w r1,r63,r1 + // If r4 was to be used in-place instead of r21, could use this sequence + // to compute absolute: + // sub r63,r4,r19 // compute absolute value of r4 + // shlri r4,32,r3 // into lower 32 bit of r4, keeping + // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. + ori r3,1,r3 + mmulfx.w r25,r2,r2 + sub r19,r0,r0 + muls.l r4,r3,r21 + msub.w r1,r2,r2 + addi r2,-2,r1 + mulu.l r21,r1,r19 + mmulfx.w r2,r2,r2 + shlli r1,15,r1 + shlrd r19,r0,r19 + mulu.l r19,r20,r3 + mmacnfx.wl r25,r2,r1 + ptabs r18,tr0 + sub r21,r3,r25 + + mulu.l r25,r1,r2 + addi r0,14,r0 + xor r4,r5,r18 + shlrd r2,r0,r2 + mulu.l r2,r20,r3 + add r19,r2,r19 + shari.l r18,31,r18 + sub r25,r3,r25 + + mulu.l r25,r1,r2 + sub r25,r20,r25 + add r19,r18,r19 + shlrd r2,r0,r2 + mulu.l r2,r20,r3 + addi r25,1,r25 + add r19,r2,r19 + + cmpgt r25,r3,r25 + add.l r19,r25,r0 + xor r0,r18,r0 + blink tr0,r63 +#else /* ! 0 && ! 0 */ + + // inputs: r4,r5 + // clobbered: r1,r18,r19,r20,r21,r25,tr0 + // result in r0 + HIDDEN_FUNC(GLOBAL(sdivsi3_2)) +#ifndef __pic__ + FUNC(GLOBAL(sdivsi3)) +GLOBAL(sdivsi3): /* this is the shcompact entry point */ + // The special SHmedia entry point sdivsi3_1 prevents accidental linking + // with the SHcompact implementation, which clobbers tr1 / tr2. + .global GLOBAL(sdivsi3_1) +GLOBAL(sdivsi3_1): + .global GLOBAL(div_table_internal) + movi (GLOBAL(div_table_internal) >> 16) & 65535, r20 + shori GLOBAL(div_table_internal) & 65535, r20 +#endif + .global GLOBAL(sdivsi3_2) + // div_table in r20 + // clobbered: r1,r18,r19,r21,r25,tr0 +GLOBAL(sdivsi3_2): + nsb r5, r1 + shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 + shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) + ldx.ub r20, r21, r19 // u0.8 + shari r25, 32, r25 // normalize to s2.30 + shlli r21, 1, r21 + muls.l r25, r19, r19 // s2.38 + ldx.w r20, r21, r21 // s2.14 + ptabs r18, tr0 + shari r19, 24, r19 // truncate to s2.14 + sub r21, r19, r19 // some 11 bit inverse in s1.14 + muls.l r19, r19, r21 // u0.28 + sub r63, r1, r1 + addi r1, 92, r1 + muls.l r25, r21, r18 // s2.58 + shlli r19, 45, r19 // multiply by two and convert to s2.58 + /* bubble */ + sub r19, r18, r18 + shari r18, 28, r18 // some 22 bit inverse in s1.30 + muls.l r18, r25, r0 // s2.60 + muls.l r18, r4, r25 // s32.30 + /* bubble */ + shari r0, 16, r19 // s-16.44 + muls.l r19, r18, r19 // s-16.74 + shari r25, 63, r0 + shari r4, 14, r18 // s19.-14 + shari r19, 30, r19 // s-16.44 + muls.l r19, r18, r19 // s15.30 + xor r21, r0, r21 // You could also use the constant 1 << 27. + add r21, r25, r21 + sub r21, r19, r21 + shard r21, r1, r21 + sub r21, r0, r0 + blink tr0, r63 +#ifndef __pic__ + ENDFUNC(GLOBAL(sdivsi3)) +#endif + ENDFUNC(GLOBAL(sdivsi3_2)) +#endif +#elif defined __SHMEDIA__ +/* m5compact-nofpu */ + // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + FUNC(GLOBAL(sdivsi3)) +GLOBAL(sdivsi3): + pt/l LOCAL(sdivsi3_dontsub), tr0 + pt/l LOCAL(sdivsi3_loop), tr1 + ptabs/l r18,tr2 + shari.l r4,31,r18 + shari.l r5,31,r19 + xor r4,r18,r20 + xor r5,r19,r21 + sub.l r20,r18,r20 + sub.l r21,r19,r21 + xor r18,r19,r19 + shlli r21,32,r25 + addi r25,-1,r21 + addz.l r20,r63,r20 +LOCAL(sdivsi3_loop): + shlli r20,1,r20 + bgeu/u r21,r20,tr0 + sub r20,r21,r20 +LOCAL(sdivsi3_dontsub): + addi.l r25,-1,r25 + bnei r25,-32,tr1 + xor r20,r19,r20 + sub.l r20,r19,r0 + blink tr2,r63 + ENDFUNC(GLOBAL(sdivsi3)) +#else /* ! __SHMEDIA__ */ + FUNC(GLOBAL(sdivsi3)) +GLOBAL(sdivsi3): + mov r4,r1 + mov r5,r0 + + tst r0,r0 + bt div0 + mov #0,r2 + div0s r2,r1 + subc r3,r3 + subc r2,r1 + div0s r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + addc r2,r1 + rts + mov r1,r0 + + +div0: rts + mov #0,r0 + + ENDFUNC(GLOBAL(sdivsi3)) +#endif /* ! __SHMEDIA__ */ +#endif /* ! __SH4__ */ +#endif +#ifdef L_udivsi3_i4 + + .title "SH DIVIDE" +!! 4 byte integer Divide code for the Renesas SH +#ifdef __SH4__ +!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4, +!! and t bit + + .global GLOBAL(udivsi3_i4) + HIDDEN_FUNC(GLOBAL(udivsi3_i4)) +GLOBAL(udivsi3_i4): + mov #1,r1 + cmp/hi r1,r5 + bf trivial + rotr r1 + xor r1,r4 + lds r4,fpul + mova L1,r0 +#ifdef FMOVD_WORKS + fmov.d @r0+,dr4 +#else + fmov.s @r0+,DR40 + fmov.s @r0,DR41 +#endif + float fpul,dr0 + xor r1,r5 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + rts + ftrc dr0,fpul + +trivial: + rts + lds r4,fpul + + .align 2 +#ifdef FMOVD_WORKS + .align 3 ! make double below 8 byte aligned. +#endif +L1: + .double 2147483648 + + ENDFUNC(GLOBAL(udivsi3_i4)) +#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) +#if ! __SH5__ || __SH5__ == 32 +!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 + .mode SHmedia + .global GLOBAL(udivsi3_i4) + HIDDEN_FUNC(GLOBAL(udivsi3_i4)) +GLOBAL(udivsi3_i4): + addz.l r4,r63,r20 + addz.l r5,r63,r21 + fmov.qd r20,dr0 + fmov.qd r21,dr32 + ptabs r18,tr0 + float.qd dr0,dr0 + float.qd dr32,dr32 + fdiv.d dr0,dr32,dr0 + ftrc.dq dr0,dr32 + fmov.s fr33,fr32 + blink tr0,r63 + + ENDFUNC(GLOBAL(udivsi3_i4)) +#endif /* ! __SH5__ || __SH5__ == 32 */ +#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) +!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 + + .global GLOBAL(udivsi3_i4) + HIDDEN_FUNC(GLOBAL(udivsi3_i4)) +GLOBAL(udivsi3_i4): + mov #1,r1 + cmp/hi r1,r5 + bf trivial + sts.l fpscr,@-r15 + mova L1,r0 + lds.l @r0+,fpscr + rotr r1 + xor r1,r4 + lds r4,fpul +#ifdef FMOVD_WORKS + fmov.d @r0+,dr4 +#else + fmov.s @r0+,DR40 + fmov.s @r0,DR41 +#endif + float fpul,dr0 + xor r1,r5 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + lds.l @r15+,fpscr + +#ifdef FMOVD_WORKS + .align 3 ! make double below 8 byte aligned. +#endif +trivial: + rts + lds r4,fpul + + .align 2 +L1: +#ifndef FMOVD_WORKS + .long 0x80000 +#else + .long 0x180000 +#endif + .double 2147483648 + + ENDFUNC(GLOBAL(udivsi3_i4)) +#endif /* ! __SH4__ */ +#endif + +#ifdef L_udivsi3 +/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with + sh2e/sh3e code. */ +#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) + +!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit + .global GLOBAL(udivsi3) + HIDDEN_FUNC(GLOBAL(udivsi3)) + +#if __SHMEDIA__ +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif + .align 2 +#if 0 +/* The assembly code that follows is a hand-optimized version of the C + code that follows. Note that the registers that are modified are + exactly those listed as clobbered in the patterns udivsi3_i1 and + udivsi3_i1_media. + +unsigned +__udivsi3 (i, j) + unsigned i, j; +{ + register unsigned long long r0 asm ("r0") = 0; + register unsigned long long r18 asm ("r18") = 1; + register unsigned long long r4 asm ("r4") = i; + register unsigned long long r19 asm ("r19") = j; + + r19 <<= 31; + r18 <<= 31; + do + if (r4 >= r19) + r0 |= r18, r4 -= r19; + while (r19 >>= 1, r18 >>= 1); + + return r0; +} +*/ +GLOBAL(udivsi3): + pt/l LOCAL(udivsi3_dontadd), tr2 + pt/l LOCAL(udivsi3_loop), tr1 + ptabs/l r18, tr0 + movi 0, r0 + movi 1, r18 + addz.l r5, r63, r19 + addz.l r4, r63, r4 + shlli r19, 31, r19 + shlli r18, 31, r18 +LOCAL(udivsi3_loop): + bgtu r19, r4, tr2 + or r0, r18, r0 + sub r4, r19, r4 +LOCAL(udivsi3_dontadd): + shlri r18, 1, r18 + shlri r19, 1, r19 + bnei r18, 0, tr1 + blink tr0, r63 +#else +GLOBAL(udivsi3): + // inputs: r4,r5 + // clobbered: r18,r19,r20,r21,r22,r25,tr0 + // result in r0. + addz.l r5,r63,r22 + nsb r22,r0 + shlld r22,r0,r25 + shlri r25,48,r25 + movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 + sub r20,r25,r21 + mmulfx.w r21,r21,r19 + mshflo.w r21,r63,r21 + ptabs r18,tr0 + mmulfx.w r25,r19,r19 + sub r20,r0,r0 + /* bubble */ + msub.w r21,r19,r19 + addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 + before the msub.w, but we need a different value for + r19 to keep errors under control. */ + mulu.l r4,r21,r18 + mmulfx.w r19,r19,r19 + shlli r21,15,r21 + shlrd r18,r0,r18 + mulu.l r18,r22,r20 + mmacnfx.wl r25,r19,r21 + /* bubble */ + sub r4,r20,r25 + + mulu.l r25,r21,r19 + addi r0,14,r0 + /* bubble */ + shlrd r19,r0,r19 + mulu.l r19,r22,r20 + add r18,r19,r18 + /* bubble */ + sub.l r25,r20,r25 + + mulu.l r25,r21,r19 + addz.l r25,r63,r25 + sub r25,r22,r25 + shlrd r19,r0,r19 + mulu.l r19,r22,r20 + addi r25,1,r25 + add r18,r19,r18 + + cmpgt r25,r20,r25 + add.l r18,r25,r0 + blink tr0,r63 +#endif +#elif defined (__SHMEDIA__) +/* m5compact-nofpu - more emphasis on code size than on speed, but don't + ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. + So use a short shmedia loop. */ + // clobbered: r20,r21,r25,tr0,tr1,tr2 + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 +GLOBAL(udivsi3): + pt/l LOCAL(udivsi3_dontsub), tr0 + pt/l LOCAL(udivsi3_loop), tr1 + ptabs/l r18,tr2 + shlli r5,32,r25 + addi r25,-1,r21 + addz.l r4,r63,r20 +LOCAL(udivsi3_loop): + shlli r20,1,r20 + bgeu/u r21,r20,tr0 + sub r20,r21,r20 +LOCAL(udivsi3_dontsub): + addi.l r25,-1,r25 + bnei r25,-32,tr1 + add.l r20,r63,r0 + blink tr2,r63 +#else /* ! defined (__SHMEDIA__) */ +LOCAL(div8): + div1 r5,r4 +LOCAL(div7): + div1 r5,r4; div1 r5,r4; div1 r5,r4 + div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 + +LOCAL(divx4): + div1 r5,r4; rotcl r0 + div1 r5,r4; rotcl r0 + div1 r5,r4; rotcl r0 + rts; div1 r5,r4 + +GLOBAL(udivsi3): + sts.l pr,@-r15 + extu.w r5,r0 + cmp/eq r5,r0 +#ifdef __sh1__ + bf LOCAL(large_divisor) +#else + bf/s LOCAL(large_divisor) +#endif + div0u + swap.w r4,r0 + shlr16 r4 + bsr LOCAL(div8) + shll16 r5 + bsr LOCAL(div7) + div1 r5,r4 + xtrct r4,r0 + xtrct r0,r4 + bsr LOCAL(div8) + swap.w r4,r4 + bsr LOCAL(div7) + div1 r5,r4 + lds.l @r15+,pr + xtrct r4,r0 + swap.w r0,r0 + rotcl r0 + rts + shlr16 r5 + +LOCAL(large_divisor): +#ifdef __sh1__ + div0u +#endif + mov #0,r0 + xtrct r4,r0 + xtrct r0,r4 + bsr LOCAL(divx4) + rotcl r0 + bsr LOCAL(divx4) + rotcl r0 + bsr LOCAL(divx4) + rotcl r0 + bsr LOCAL(divx4) + rotcl r0 + lds.l @r15+,pr + rts + rotcl r0 + + ENDFUNC(GLOBAL(udivsi3)) +#endif /* ! __SHMEDIA__ */ +#endif /* __SH4__ */ +#endif /* L_udivsi3 */ + +#ifdef L_udivdi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(udivdi3) + FUNC(GLOBAL(udivdi3)) +GLOBAL(udivdi3): + HIDDEN_ALIAS(udivdi3_internal,udivdi3) + shlri r3,1,r4 + nsb r4,r22 + shlld r3,r22,r6 + shlri r6,49,r5 + movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ + sub r21,r5,r1 + mmulfx.w r1,r1,r4 + mshflo.w r1,r63,r1 + sub r63,r22,r20 // r63 == 64 % 64 + mmulfx.w r5,r4,r4 + pta LOCAL(large_divisor),tr0 + addi r20,32,r9 + msub.w r1,r4,r1 + madd.w r1,r1,r1 + mmulfx.w r1,r1,r4 + shlri r6,32,r7 + bgt/u r9,r63,tr0 // large_divisor + mmulfx.w r5,r4,r4 + shlri r2,32+14,r19 + addi r22,-31,r0 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r19,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + mulu.l r5,r3,r8 + mshalds.l r1,r21,r1 + shari r4,26,r4 + shlld r8,r0,r8 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r2,r8,r2 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ + + shlri r2,22,r21 + mulu.l r21,r1,r21 + shlld r5,r0,r8 + addi r20,30-22,r0 + shlrd r21,r0,r21 + mulu.l r21,r3,r5 + add r8,r21,r8 + mcmpgt.l r21,r63,r21 // See Note 1 + addi r20,30,r0 + mshfhi.l r63,r21,r21 + sub r2,r5,r2 + andc r2,r21,r2 + + /* small divisor: need a third divide step */ + mulu.l r2,r1,r7 + ptabs r18,tr0 + addi r2,1,r2 + shlrd r7,r0,r7 + mulu.l r7,r3,r5 + add r8,r7,r8 + sub r2,r3,r2 + cmpgt r2,r5,r5 + add r8,r5,r2 + /* could test r3 here to check for divide by zero. */ + blink tr0,r63 + +LOCAL(large_divisor): + mmulfx.w r5,r4,r4 + shlrd r2,r9,r25 + shlri r25,32,r8 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r8,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + shlri r5,14-1,r8 + mulu.l r8,r7,r5 + mshalds.l r1,r21,r1 + shari r4,26,r4 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r25,r5,r25 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ + + shlri r25,22,r21 + mulu.l r21,r1,r21 + pta LOCAL(no_lo_adj),tr0 + addi r22,32,r0 + shlri r21,40,r21 + mulu.l r21,r7,r5 + add r8,r21,r8 + shlld r2,r0,r2 + sub r25,r5,r25 + bgtu/u r7,r25,tr0 // no_lo_adj + addi r8,1,r8 + sub r25,r7,r25 +LOCAL(no_lo_adj): + mextr4 r2,r25,r2 + + /* large_divisor: only needs a few adjustments. */ + mulu.l r8,r6,r5 + ptabs r18,tr0 + /* bubble */ + cmpgtu r5,r2,r5 + sub r8,r5,r2 + blink tr0,r63 + ENDFUNC(GLOBAL(udivdi3)) +/* Note 1: To shift the result of the second divide stage so that the result + always fits into 32 bits, yet we still reduce the rest sufficiently + would require a lot of instructions to do the shifts just right. Using + the full 64 bit shift result to multiply with the divisor would require + four extra instructions for the upper 32 bits (shift / mulu / shift / sub). + Fortunately, if the upper 32 bits of the shift result are nonzero, we + know that the rest after taking this partial result into account will + fit into 32 bits. So we just clear the upper 32 bits of the rest if the + upper 32 bits of the partial result are nonzero. */ +#endif /* __SHMEDIA__ */ +#endif /* L_udivdi3 */ + +#ifdef L_divdi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(divdi3) + FUNC(GLOBAL(divdi3)) +GLOBAL(divdi3): + pta GLOBAL(udivdi3_internal),tr0 + shari r2,63,r22 + shari r3,63,r23 + xor r2,r22,r2 + xor r3,r23,r3 + sub r2,r22,r2 + sub r3,r23,r3 + beq/u r22,r23,tr0 + ptabs r18,tr1 + blink tr0,r18 + sub r63,r2,r2 + blink tr1,r63 + ENDFUNC(GLOBAL(divdi3)) +#endif /* __SHMEDIA__ */ +#endif /* L_divdi3 */ + +#ifdef L_umoddi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(umoddi3) + FUNC(GLOBAL(umoddi3)) +GLOBAL(umoddi3): + HIDDEN_ALIAS(umoddi3_internal,umoddi3) + shlri r3,1,r4 + nsb r4,r22 + shlld r3,r22,r6 + shlri r6,49,r5 + movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ + sub r21,r5,r1 + mmulfx.w r1,r1,r4 + mshflo.w r1,r63,r1 + sub r63,r22,r20 // r63 == 64 % 64 + mmulfx.w r5,r4,r4 + pta LOCAL(large_divisor),tr0 + addi r20,32,r9 + msub.w r1,r4,r1 + madd.w r1,r1,r1 + mmulfx.w r1,r1,r4 + shlri r6,32,r7 + bgt/u r9,r63,tr0 // large_divisor + mmulfx.w r5,r4,r4 + shlri r2,32+14,r19 + addi r22,-31,r0 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r19,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + mulu.l r5,r3,r5 + mshalds.l r1,r21,r1 + shari r4,26,r4 + shlld r5,r0,r5 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r2,r5,r2 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ + + shlri r2,22,r21 + mulu.l r21,r1,r21 + addi r20,30-22,r0 + /* bubble */ /* could test r3 here to check for divide by zero. */ + shlrd r21,r0,r21 + mulu.l r21,r3,r5 + mcmpgt.l r21,r63,r21 // See Note 1 + addi r20,30,r0 + mshfhi.l r63,r21,r21 + sub r2,r5,r2 + andc r2,r21,r2 + + /* small divisor: need a third divide step */ + mulu.l r2,r1,r7 + ptabs r18,tr0 + sub r2,r3,r8 /* re-use r8 here for rest - r3 */ + shlrd r7,r0,r7 + mulu.l r7,r3,r5 + /* bubble */ + addi r8,1,r7 + cmpgt r7,r5,r7 + cmvne r7,r8,r2 + sub r2,r5,r2 + blink tr0,r63 + +LOCAL(large_divisor): + mmulfx.w r5,r4,r4 + shlrd r2,r9,r25 + shlri r25,32,r8 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r8,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + shlri r5,14-1,r8 + mulu.l r8,r7,r5 + mshalds.l r1,r21,r1 + shari r4,26,r4 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r25,r5,r25 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ + + shlri r25,22,r21 + mulu.l r21,r1,r21 + pta LOCAL(no_lo_adj),tr0 + addi r22,32,r0 + shlri r21,40,r21 + mulu.l r21,r7,r5 + add r8,r21,r8 + shlld r2,r0,r2 + sub r25,r5,r25 + bgtu/u r7,r25,tr0 // no_lo_adj + addi r8,1,r8 + sub r25,r7,r25 +LOCAL(no_lo_adj): + mextr4 r2,r25,r2 + + /* large_divisor: only needs a few adjustments. */ + mulu.l r8,r6,r5 + ptabs r18,tr0 + add r2,r6,r7 + cmpgtu r5,r2,r8 + cmvne r8,r7,r2 + sub r2,r5,r2 + shlrd r2,r22,r2 + blink tr0,r63 + ENDFUNC(GLOBAL(umoddi3)) +/* Note 1: To shift the result of the second divide stage so that the result + always fits into 32 bits, yet we still reduce the rest sufficiently + would require a lot of instructions to do the shifts just right. Using + the full 64 bit shift result to multiply with the divisor would require + four extra instructions for the upper 32 bits (shift / mulu / shift / sub). + Fortunately, if the upper 32 bits of the shift result are nonzero, we + know that the rest after taking this partial result into account will + fit into 32 bits. So we just clear the upper 32 bits of the rest if the + upper 32 bits of the partial result are nonzero. */ +#endif /* __SHMEDIA__ */ +#endif /* L_umoddi3 */ + +#ifdef L_moddi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(moddi3) + FUNC(GLOBAL(moddi3)) +GLOBAL(moddi3): + pta GLOBAL(umoddi3_internal),tr0 + shari r2,63,r22 + shari r3,63,r23 + xor r2,r22,r2 + xor r3,r23,r3 + sub r2,r22,r2 + sub r3,r23,r3 + beq/u r22,r63,tr0 + ptabs r18,tr1 + blink tr0,r18 + sub r63,r2,r2 + blink tr1,r63 + ENDFUNC(GLOBAL(moddi3)) +#endif /* __SHMEDIA__ */ +#endif /* L_moddi3 */ + +#ifdef L_set_fpscr +#if !defined (__SH2A_NOFPU__) +#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 +#ifdef __SH5__ + .mode SHcompact +#endif + .global GLOBAL(set_fpscr) + HIDDEN_FUNC(GLOBAL(set_fpscr)) +GLOBAL(set_fpscr): + lds r4,fpscr +#ifdef __PIC__ + mov.l r12,@-r15 +#ifdef __vxworks + mov.l LOCAL(set_fpscr_L0_base),r12 + mov.l LOCAL(set_fpscr_L0_index),r0 + mov.l @r12,r12 + mov.l @(r0,r12),r12 +#else + mova LOCAL(set_fpscr_L0),r0 + mov.l LOCAL(set_fpscr_L0),r12 + add r0,r12 +#endif + mov.l LOCAL(set_fpscr_L1),r0 + mov.l @(r0,r12),r1 + mov.l @r15+,r12 +#else + mov.l LOCAL(set_fpscr_L1),r1 +#endif + swap.w r4,r0 + or #24,r0 +#ifndef FMOVD_WORKS + xor #16,r0 +#endif +#if defined(__SH4__) || defined (__SH2A_DOUBLE__) + swap.w r0,r3 + mov.l r3,@(4,r1) +#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ + swap.w r0,r2 + mov.l r2,@r1 +#endif +#ifndef FMOVD_WORKS + xor #8,r0 +#else + xor #24,r0 +#endif +#if defined(__SH4__) || defined (__SH2A_DOUBLE__) + swap.w r0,r2 + rts + mov.l r2,@r1 +#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ + swap.w r0,r3 + rts + mov.l r3,@(4,r1) +#endif + .align 2 +#ifdef __PIC__ +#ifdef __vxworks +LOCAL(set_fpscr_L0_base): + .long ___GOTT_BASE__ +LOCAL(set_fpscr_L0_index): + .long ___GOTT_INDEX__ +#else +LOCAL(set_fpscr_L0): + .long _GLOBAL_OFFSET_TABLE_ +#endif +LOCAL(set_fpscr_L1): + .long GLOBAL(fpscr_values@GOT) +#else +LOCAL(set_fpscr_L1): + .long GLOBAL(fpscr_values) +#endif + + ENDFUNC(GLOBAL(set_fpscr)) +#ifndef NO_FPSCR_VALUES +#ifdef __ELF__ + .comm GLOBAL(fpscr_values),8,4 +#else + .comm GLOBAL(fpscr_values),8 +#endif /* ELF */ +#endif /* NO_FPSCR_VALUES */ +#endif /* SH2E / SH3E / SH4 */ +#endif /* __SH2A_NOFPU__ */ +#endif /* L_set_fpscr */ +#ifdef L_ic_invalidate +#if __SH5__ == 32 + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(init_trampoline) + HIDDEN_FUNC(GLOBAL(init_trampoline)) +GLOBAL(init_trampoline): + st.l r0,8,r2 +#ifdef __LITTLE_ENDIAN__ + movi 9,r20 + shori 0x402b,r20 + shori 0xd101,r20 + shori 0xd002,r20 +#else + movi 0xffffffffffffd002,r20 + shori 0xd101,r20 + shori 0x402b,r20 + shori 9,r20 +#endif + st.q r0,0,r20 + st.l r0,12,r3 + ENDFUNC(GLOBAL(init_trampoline)) + .global GLOBAL(ic_invalidate) + HIDDEN_FUNC(GLOBAL(ic_invalidate)) +GLOBAL(ic_invalidate): + ocbwb r0,0 + synco + icbi r0, 0 + ptabs r18, tr0 + synci + blink tr0, r63 + ENDFUNC(GLOBAL(ic_invalidate)) +#elif defined(__SH4A__) + .global GLOBAL(ic_invalidate) + HIDDEN_FUNC(GLOBAL(ic_invalidate)) +GLOBAL(ic_invalidate): + ocbwb @r4 + synco + icbi @r4 + rts + nop + ENDFUNC(GLOBAL(ic_invalidate)) +#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) + /* For system code, we use ic_invalidate_line_i, but user code + needs a different mechanism. A kernel call is generally not + available, and it would also be slow. Different SH4 variants use + different sizes and associativities of the Icache. We use a small + bit of dispatch code that can be put hidden in every shared object, + which calls the actual processor-specific invalidation code in a + separate module. + Or if you have operating system support, the OS could mmap the + procesor-specific code from a single page, since it is highly + repetitive. */ + .global GLOBAL(ic_invalidate) + HIDDEN_FUNC(GLOBAL(ic_invalidate)) +GLOBAL(ic_invalidate): +#ifdef __pic__ +#ifdef __vxworks + mov.l 1f,r1 + mov.l 2f,r0 + mov.l @r1,r1 + mov.l 0f,r2 + mov.l @(r0,r1),r0 +#else + mov.l 1f,r1 + mova 1f,r0 + mov.l 0f,r2 + add r1,r0 +#endif + mov.l @(r0,r2),r1 +#else + mov.l 0f,r1 +#endif + ocbwb @r4 + mov.l @(8,r1),r0 + sub r1,r4 + and r4,r0 + add r1,r0 + jmp @r0 + mov.l @(4,r1),r0 + .align 2 +#ifndef __pic__ +0: .long GLOBAL(ic_invalidate_array) +#else /* __pic__ */ + .global GLOBAL(ic_invalidate_array) +0: .long GLOBAL(ic_invalidate_array)@GOT +#ifdef __vxworks +1: .long ___GOTT_BASE__ +2: .long ___GOTT_INDEX__ +#else +1: .long _GLOBAL_OFFSET_TABLE_ +#endif + ENDFUNC(GLOBAL(ic_invalidate)) +#endif /* __pic__ */ +#endif /* SH4 */ +#endif /* L_ic_invalidate */ + +#ifdef L_ic_invalidate_array +#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)))) + .global GLOBAL(ic_invalidate_array) + /* This is needed when an SH4 dso with trampolines is used on SH4A. */ + .global GLOBAL(ic_invalidate_array) + FUNC(GLOBAL(ic_invalidate_array)) +GLOBAL(ic_invalidate_array): + add r1,r4 + synco + icbi @r4 + rts + nop + .align 2 + .long 0 + ENDFUNC(GLOBAL(ic_invalidate_array)) +#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) + .global GLOBAL(ic_invalidate_array) + .p2align 5 + FUNC(GLOBAL(ic_invalidate_array)) +/* This must be aligned to the beginning of a cache line. */ +GLOBAL(ic_invalidate_array): +#ifndef WAYS +#define WAYS 4 +#define WAY_SIZE 0x4000 +#endif +#if WAYS == 1 + .rept WAY_SIZE * WAYS / 32 + rts + nop + .rept 7 + .long WAY_SIZE - 32 + .endr + .endr +#elif WAYS <= 6 + .rept WAY_SIZE * WAYS / 32 + braf r0 + add #-8,r0 + .long WAY_SIZE + 8 + .long WAY_SIZE - 32 + .rept WAYS-2 + braf r0 + nop + .endr + .rept 7 - WAYS + rts + nop + .endr + .endr +#else /* WAYS > 6 */ + /* This variant needs two different pages for mmap-ing. */ + .rept WAYS-1 + .rept WAY_SIZE / 32 + braf r0 + nop + .long WAY_SIZE + .rept 6 + .long WAY_SIZE - 32 + .endr + .endr + .endr + .rept WAY_SIZE / 32 + rts + .rept 15 + nop + .endr + .endr +#endif /* WAYS */ + ENDFUNC(GLOBAL(ic_invalidate_array)) +#endif /* SH4 */ +#endif /* L_ic_invalidate_array */ + +#if defined (__SH5__) && __SH5__ == 32 +#ifdef L_shcompact_call_trampoline + .section .rodata + .align 1 +LOCAL(ct_main_table): +.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) + .mode SHmedia + .section .text..SHmedia32, "ax" + .align 2 + + /* This function loads 64-bit general-purpose registers from the + stack, from a memory address contained in them or from an FP + register, according to a cookie passed in r1. Its execution + time is linear on the number of registers that actually have + to be copied. See sh.h for details on the actual bit pattern. + + The function to be called is passed in r0. If a 32-bit return + value is expected, the actual function will be tail-called, + otherwise the return address will be stored in r10 (that the + caller should expect to be clobbered) and the return value + will be expanded into r2/r3 upon return. */ + + .global GLOBAL(GCC_shcompact_call_trampoline) + FUNC(GLOBAL(GCC_shcompact_call_trampoline)) +GLOBAL(GCC_shcompact_call_trampoline): + ptabs/l r0, tr0 /* Prepare to call the actual function. */ + movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 + pt/l LOCAL(ct_loop), tr1 + addz.l r1, r63, r1 + shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 +LOCAL(ct_loop): + nsb r1, r28 + shlli r28, 1, r29 + ldx.w r0, r29, r30 +LOCAL(ct_main_label): + ptrel/l r30, tr2 + blink tr2, r63 +LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ + /* It must be dr0, so just do it. */ + fmov.dq dr0, r2 + movi 7, r30 + shlli r30, 29, r31 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ + /* It is either dr0 or dr2. */ + movi 7, r30 + shlri r1, 26, r32 + shlli r30, 26, r31 + andc r1, r31, r1 + fmov.dq dr0, r3 + beqi/l r32, 4, tr1 + fmov.dq dr2, r3 + blink tr1, r63 +LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ + shlri r1, 23 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 +LOCAL(ct_r4_fp_base): + ptrel/l r32, tr2 + movi 7, r30 + shlli r30, 23, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r4_fp_copy): + fmov.dq dr0, r4 + blink tr1, r63 + fmov.dq dr2, r4 + blink tr1, r63 + fmov.dq dr4, r4 + blink tr1, r63 +LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ + shlri r1, 20 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 +LOCAL(ct_r5_fp_base): + ptrel/l r32, tr2 + movi 7, r30 + shlli r30, 20, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r5_fp_copy): + fmov.dq dr0, r5 + blink tr1, r63 + fmov.dq dr2, r5 + blink tr1, r63 + fmov.dq dr4, r5 + blink tr1, r63 + fmov.dq dr6, r5 + blink tr1, r63 +LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ + /* It must be dr8. */ + fmov.dq dr8, r6 + movi 15, r30 + shlli r30, 16, r31 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ + shlri r1, 16 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 +LOCAL(ct_r6_fp_base): + ptrel/l r32, tr2 + movi 7, r30 + shlli r30, 16, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r6_fp_copy): + fmov.dq dr0, r6 + blink tr1, r63 + fmov.dq dr2, r6 + blink tr1, r63 + fmov.dq dr4, r6 + blink tr1, r63 + fmov.dq dr6, r6 + blink tr1, r63 +LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ + /* It is either dr8 or dr10. */ + movi 15 << 12, r31 + shlri r1, 12, r32 + andc r1, r31, r1 + fmov.dq dr8, r7 + beqi/l r32, 8, tr1 + fmov.dq dr10, r7 + blink tr1, r63 +LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ + shlri r1, 12 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 +LOCAL(ct_r7_fp_base): + ptrel/l r32, tr2 + movi 7 << 12, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r7_fp_copy): + fmov.dq dr0, r7 + blink tr1, r63 + fmov.dq dr2, r7 + blink tr1, r63 + fmov.dq dr4, r7 + blink tr1, r63 + fmov.dq dr6, r7 + blink tr1, r63 +LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ + /* It is either dr8 or dr10. */ + movi 15 << 8, r31 + andi r1, 1 << 8, r32 + andc r1, r31, r1 + fmov.dq dr8, r8 + beq/l r32, r63, tr1 + fmov.dq dr10, r8 + blink tr1, r63 +LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ + shlri r1, 8 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 +LOCAL(ct_r8_fp_base): + ptrel/l r32, tr2 + movi 7 << 8, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r8_fp_copy): + fmov.dq dr0, r8 + blink tr1, r63 + fmov.dq dr2, r8 + blink tr1, r63 + fmov.dq dr4, r8 + blink tr1, r63 + fmov.dq dr6, r8 + blink tr1, r63 +LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ + /* It is either dr8 or dr10. */ + movi 15 << 4, r31 + andi r1, 1 << 4, r32 + andc r1, r31, r1 + fmov.dq dr8, r9 + beq/l r32, r63, tr1 + fmov.dq dr10, r9 + blink tr1, r63 +LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ + shlri r1, 4 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 +LOCAL(ct_r9_fp_base): + ptrel/l r32, tr2 + movi 7 << 4, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r9_fp_copy): + fmov.dq dr0, r9 + blink tr1, r63 + fmov.dq dr2, r9 + blink tr1, r63 + fmov.dq dr4, r9 + blink tr1, r63 + fmov.dq dr6, r9 + blink tr1, r63 +LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ + pt/l LOCAL(ct_r2_load), tr2 + movi 3, r30 + shlli r30, 29, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r2, 8, r3 + ldx.q r2, r63, r2 + /* Fall through. */ +LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ + pt/l LOCAL(ct_r3_load), tr2 + movi 3, r30 + shlli r30, 26, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r3, 8, r4 + ldx.q r3, r63, r3 +LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ + pt/l LOCAL(ct_r4_load), tr2 + movi 3, r30 + shlli r30, 23, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r4, 8, r5 + ldx.q r4, r63, r4 +LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ + pt/l LOCAL(ct_r5_load), tr2 + movi 3, r30 + shlli r30, 20, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r5, 8, r6 + ldx.q r5, r63, r5 +LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ + pt/l LOCAL(ct_r6_load), tr2 + movi 3 << 16, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r6, 8, r7 + ldx.q r6, r63, r6 +LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ + pt/l LOCAL(ct_r7_load), tr2 + movi 3 << 12, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r7, 8, r8 + ldx.q r7, r63, r7 +LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ + pt/l LOCAL(ct_r8_load), tr2 + movi 3 << 8, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r8, 8, r9 + ldx.q r8, r63, r8 +LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ + pt/l LOCAL(ct_check_tramp), tr2 + ldx.q r9, r63, r9 + blink tr2, r63 +LOCAL(ct_r2_load): + ldx.q r2, r63, r2 + blink tr1, r63 +LOCAL(ct_r3_load): + ldx.q r3, r63, r3 + blink tr1, r63 +LOCAL(ct_r4_load): + ldx.q r4, r63, r4 + blink tr1, r63 +LOCAL(ct_r5_load): + ldx.q r5, r63, r5 + blink tr1, r63 +LOCAL(ct_r6_load): + ldx.q r6, r63, r6 + blink tr1, r63 +LOCAL(ct_r7_load): + ldx.q r7, r63, r7 + blink tr1, r63 +LOCAL(ct_r8_load): + ldx.q r8, r63, r8 + blink tr1, r63 +LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r2 + shlli r30, 29, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r3 + shlli r30, 26, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r4 + shlli r30, 23, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r5 + shlli r30, 20, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r6 + shlli r30, 16, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ + ldx.q r15, r63, r7 + movi 1 << 12, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ + ldx.q r15, r63, r8 + movi 1 << 8, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ + andi r1, 7 << 1, r30 + movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 + shlli r30, 2, r31 + shori LOCAL(ct_end_of_pop_seq) & 65535, r32 + sub.l r32, r31, r33 + ptabs/l r33, tr2 + blink tr2, r63 +LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ + ldx.q r15, r63, r3 + addi.l r15, 8, r15 + ldx.q r15, r63, r4 + addi.l r15, 8, r15 + ldx.q r15, r63, r5 + addi.l r15, 8, r15 + ldx.q r15, r63, r6 + addi.l r15, 8, r15 + ldx.q r15, r63, r7 + addi.l r15, 8, r15 + ldx.q r15, r63, r8 + addi.l r15, 8, r15 +LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ + ldx.q r15, r63, r9 + addi.l r15, 8, r15 +LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ +LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ + pt/u LOCAL(ct_ret_wide), tr2 + andi r1, 1, r1 + bne/u r1, r63, tr2 +LOCAL(ct_call_func): /* Just branch to the function. */ + blink tr0, r63 +LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its + 64-bit return value. */ + add.l r18, r63, r10 + blink tr0, r18 + ptabs r10, tr0 +#if __LITTLE_ENDIAN__ + shari r2, 32, r3 + add.l r2, r63, r2 +#else + add.l r2, r63, r3 + shari r2, 32, r2 +#endif + blink tr0, r63 + + ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline)) +#endif /* L_shcompact_call_trampoline */ + +#ifdef L_shcompact_return_trampoline + /* This function does the converse of the code in `ret_wide' + above. It is tail-called by SHcompact functions returning + 64-bit non-floating-point values, to pack the 32-bit values in + r2 and r3 into r2. */ + + .mode SHmedia + .section .text..SHmedia32, "ax" + .align 2 + .global GLOBAL(GCC_shcompact_return_trampoline) + HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline)) +GLOBAL(GCC_shcompact_return_trampoline): + ptabs/l r18, tr0 +#if __LITTLE_ENDIAN__ + addz.l r2, r63, r2 + shlli r3, 32, r3 +#else + addz.l r3, r63, r3 + shlli r2, 32, r2 +#endif + or r3, r2, r2 + blink tr0, r63 + + ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline)) +#endif /* L_shcompact_return_trampoline */ + +#ifdef L_shcompact_incoming_args + .section .rodata + .align 1 +LOCAL(ia_main_table): +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) + .mode SHmedia + .section .text..SHmedia32, "ax" + .align 2 + + /* This function stores 64-bit general-purpose registers back in + the stack, and loads the address in which each register + was stored into itself. The lower 32 bits of r17 hold the address + to begin storing, and the upper 32 bits of r17 hold the cookie. + Its execution time is linear on the + number of registers that actually have to be copied, and it is + optimized for structures larger than 64 bits, as opposed to + individual `long long' arguments. See sh.h for details on the + actual bit pattern. */ + + .global GLOBAL(GCC_shcompact_incoming_args) + FUNC(GLOBAL(GCC_shcompact_incoming_args)) +GLOBAL(GCC_shcompact_incoming_args): + ptabs/l r18, tr0 /* Prepare to return. */ + shlri r17, 32, r0 /* Load the cookie. */ + movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 + pt/l LOCAL(ia_loop), tr1 + add.l r17, r63, r17 + shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 +LOCAL(ia_loop): + nsb r0, r36 + shlli r36, 1, r37 + ldx.w r43, r37, r38 +LOCAL(ia_main_label): + ptrel/l r38, tr2 + blink tr2, r63 +LOCAL(ia_r2_ld): /* Store r2 and load its address. */ + movi 3, r38 + shlli r38, 29, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r2 + add.l r17, r63, r2 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r3_ld): /* Store r3 and load its address. */ + movi 3, r38 + shlli r38, 26, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r3 + add.l r17, r63, r3 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r4_ld): /* Store r4 and load its address. */ + movi 3, r38 + shlli r38, 23, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r4 + add.l r17, r63, r4 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r5_ld): /* Store r5 and load its address. */ + movi 3, r38 + shlli r38, 20, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r5 + add.l r17, r63, r5 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r6_ld): /* Store r6 and load its address. */ + movi 3, r38 + shlli r38, 16, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r6 + add.l r17, r63, r6 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r7_ld): /* Store r7 and load its address. */ + movi 3 << 12, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r7 + add.l r17, r63, r7 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r8_ld): /* Store r8 and load its address. */ + movi 3 << 8, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r8 + add.l r17, r63, r8 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r9_ld): /* Store r9 and load its address. */ + stx.q r17, r63, r9 + add.l r17, r63, r9 + blink tr0, r63 +LOCAL(ia_r2_push): /* Push r2 onto the stack. */ + movi 1, r38 + shlli r38, 29, r39 + andc r0, r39, r0 + stx.q r17, r63, r2 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r3_push): /* Push r3 onto the stack. */ + movi 1, r38 + shlli r38, 26, r39 + andc r0, r39, r0 + stx.q r17, r63, r3 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r4_push): /* Push r4 onto the stack. */ + movi 1, r38 + shlli r38, 23, r39 + andc r0, r39, r0 + stx.q r17, r63, r4 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r5_push): /* Push r5 onto the stack. */ + movi 1, r38 + shlli r38, 20, r39 + andc r0, r39, r0 + stx.q r17, r63, r5 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r6_push): /* Push r6 onto the stack. */ + movi 1, r38 + shlli r38, 16, r39 + andc r0, r39, r0 + stx.q r17, r63, r6 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r7_push): /* Push r7 onto the stack. */ + movi 1 << 12, r39 + andc r0, r39, r0 + stx.q r17, r63, r7 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r8_push): /* Push r8 onto the stack. */ + movi 1 << 8, r39 + andc r0, r39, r0 + stx.q r17, r63, r8 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ + andi r0, 7 << 1, r38 + movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 + shlli r38, 2, r39 + shori LOCAL(ia_end_of_push_seq) & 65535, r40 + sub.l r40, r39, r41 + ptabs/l r41, tr2 + blink tr2, r63 +LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ + stx.q r17, r63, r3 + addi.l r17, 8, r17 + stx.q r17, r63, r4 + addi.l r17, 8, r17 + stx.q r17, r63, r5 + addi.l r17, 8, r17 + stx.q r17, r63, r6 + addi.l r17, 8, r17 + stx.q r17, r63, r7 + addi.l r17, 8, r17 + stx.q r17, r63, r8 + addi.l r17, 8, r17 +LOCAL(ia_r9_push): /* Push r9 onto the stack. */ + stx.q r17, r63, r9 +LOCAL(ia_return): /* Return. */ + blink tr0, r63 +LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ + ENDFUNC(GLOBAL(GCC_shcompact_incoming_args)) +#endif /* L_shcompact_incoming_args */ +#endif +#if __SH5__ +#ifdef L_nested_trampoline +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif + .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ + .global GLOBAL(GCC_nested_trampoline) + HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline)) +GLOBAL(GCC_nested_trampoline): + .mode SHmedia + ptrel/u r63, tr0 + gettr tr0, r0 +#if __SH5__ == 64 + ld.q r0, 24, r1 +#else + ld.l r0, 24, r1 +#endif + ptabs/l r1, tr1 +#if __SH5__ == 64 + ld.q r0, 32, r1 +#else + ld.l r0, 28, r1 +#endif + blink tr1, r63 + + ENDFUNC(GLOBAL(GCC_nested_trampoline)) +#endif /* L_nested_trampoline */ +#endif /* __SH5__ */ +#if __SH5__ == 32 +#ifdef L_push_pop_shmedia_regs + .section .text..SHmedia32,"ax" + .mode SHmedia + .align 2 +#ifndef __SH4_NOFPU__ + .global GLOBAL(GCC_push_shmedia_regs) + FUNC(GLOBAL(GCC_push_shmedia_regs)) +GLOBAL(GCC_push_shmedia_regs): + addi.l r15, -14*8, r15 + fst.d r15, 13*8, dr62 + fst.d r15, 12*8, dr60 + fst.d r15, 11*8, dr58 + fst.d r15, 10*8, dr56 + fst.d r15, 9*8, dr54 + fst.d r15, 8*8, dr52 + fst.d r15, 7*8, dr50 + fst.d r15, 6*8, dr48 + fst.d r15, 5*8, dr46 + fst.d r15, 4*8, dr44 + fst.d r15, 3*8, dr42 + fst.d r15, 2*8, dr40 + fst.d r15, 1*8, dr38 + fst.d r15, 0*8, dr36 +#else /* ! __SH4_NOFPU__ */ + .global GLOBAL(GCC_push_shmedia_regs_nofpu) + FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) +GLOBAL(GCC_push_shmedia_regs_nofpu): +#endif /* ! __SH4_NOFPU__ */ + ptabs/l r18, tr0 + addi.l r15, -27*8, r15 + gettr tr7, r62 + gettr tr6, r61 + gettr tr5, r60 + st.q r15, 26*8, r62 + st.q r15, 25*8, r61 + st.q r15, 24*8, r60 + st.q r15, 23*8, r59 + st.q r15, 22*8, r58 + st.q r15, 21*8, r57 + st.q r15, 20*8, r56 + st.q r15, 19*8, r55 + st.q r15, 18*8, r54 + st.q r15, 17*8, r53 + st.q r15, 16*8, r52 + st.q r15, 15*8, r51 + st.q r15, 14*8, r50 + st.q r15, 13*8, r49 + st.q r15, 12*8, r48 + st.q r15, 11*8, r47 + st.q r15, 10*8, r46 + st.q r15, 9*8, r45 + st.q r15, 8*8, r44 + st.q r15, 7*8, r35 + st.q r15, 6*8, r34 + st.q r15, 5*8, r33 + st.q r15, 4*8, r32 + st.q r15, 3*8, r31 + st.q r15, 2*8, r30 + st.q r15, 1*8, r29 + st.q r15, 0*8, r28 + blink tr0, r63 +#ifndef __SH4_NOFPU__ + ENDFUNC(GLOBAL(GCC_push_shmedia_regs)) +#else + ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) +#endif +#ifndef __SH4_NOFPU__ + .global GLOBAL(GCC_pop_shmedia_regs) + FUNC(GLOBAL(GCC_pop_shmedia_regs)) +GLOBAL(GCC_pop_shmedia_regs): + pt .L0, tr1 + movi 41*8, r0 + fld.d r15, 40*8, dr62 + fld.d r15, 39*8, dr60 + fld.d r15, 38*8, dr58 + fld.d r15, 37*8, dr56 + fld.d r15, 36*8, dr54 + fld.d r15, 35*8, dr52 + fld.d r15, 34*8, dr50 + fld.d r15, 33*8, dr48 + fld.d r15, 32*8, dr46 + fld.d r15, 31*8, dr44 + fld.d r15, 30*8, dr42 + fld.d r15, 29*8, dr40 + fld.d r15, 28*8, dr38 + fld.d r15, 27*8, dr36 + blink tr1, r63 +#else /* ! __SH4_NOFPU__ */ + .global GLOBAL(GCC_pop_shmedia_regs_nofpu) + FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) +GLOBAL(GCC_pop_shmedia_regs_nofpu): +#endif /* ! __SH4_NOFPU__ */ + movi 27*8, r0 +.L0: + ptabs r18, tr0 + ld.q r15, 26*8, r62 + ld.q r15, 25*8, r61 + ld.q r15, 24*8, r60 + ptabs r62, tr7 + ptabs r61, tr6 + ptabs r60, tr5 + ld.q r15, 23*8, r59 + ld.q r15, 22*8, r58 + ld.q r15, 21*8, r57 + ld.q r15, 20*8, r56 + ld.q r15, 19*8, r55 + ld.q r15, 18*8, r54 + ld.q r15, 17*8, r53 + ld.q r15, 16*8, r52 + ld.q r15, 15*8, r51 + ld.q r15, 14*8, r50 + ld.q r15, 13*8, r49 + ld.q r15, 12*8, r48 + ld.q r15, 11*8, r47 + ld.q r15, 10*8, r46 + ld.q r15, 9*8, r45 + ld.q r15, 8*8, r44 + ld.q r15, 7*8, r35 + ld.q r15, 6*8, r34 + ld.q r15, 5*8, r33 + ld.q r15, 4*8, r32 + ld.q r15, 3*8, r31 + ld.q r15, 2*8, r30 + ld.q r15, 1*8, r29 + ld.q r15, 0*8, r28 + add.l r15, r0, r15 + blink tr0, r63 + +#ifndef __SH4_NOFPU__ + ENDFUNC(GLOBAL(GCC_pop_shmedia_regs)) +#else + ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) +#endif +#endif /* __SH5__ == 32 */ +#endif /* L_push_pop_shmedia_regs */ + +#ifdef L_div_table +#if __SH5__ +#if defined(__pic__) && defined(__SHMEDIA__) + .global GLOBAL(sdivsi3) + FUNC(GLOBAL(sdivsi3)) +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif +#if 0 +/* ??? FIXME: Presumably due to a linker bug, exporting data symbols + in a text section does not work (at least for shared libraries): + the linker sets the LSB of the address as if this was SHmedia code. */ +#define TEXT_DATA_BUG +#endif + .align 2 + // inputs: r4,r5 + // clobbered: r1,r18,r19,r20,r21,r25,tr0 + // result in r0 + .global GLOBAL(sdivsi3) +GLOBAL(sdivsi3): +#ifdef TEXT_DATA_BUG + ptb datalabel Local_div_table,tr0 +#else + ptb GLOBAL(div_table_internal),tr0 +#endif + nsb r5, r1 + shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 + shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) + /* bubble */ + gettr tr0,r20 + ldx.ub r20, r21, r19 // u0.8 + shari r25, 32, r25 // normalize to s2.30 + shlli r21, 1, r21 + muls.l r25, r19, r19 // s2.38 + ldx.w r20, r21, r21 // s2.14 + ptabs r18, tr0 + shari r19, 24, r19 // truncate to s2.14 + sub r21, r19, r19 // some 11 bit inverse in s1.14 + muls.l r19, r19, r21 // u0.28 + sub r63, r1, r1 + addi r1, 92, r1 + muls.l r25, r21, r18 // s2.58 + shlli r19, 45, r19 // multiply by two and convert to s2.58 + /* bubble */ + sub r19, r18, r18 + shari r18, 28, r18 // some 22 bit inverse in s1.30 + muls.l r18, r25, r0 // s2.60 + muls.l r18, r4, r25 // s32.30 + /* bubble */ + shari r0, 16, r19 // s-16.44 + muls.l r19, r18, r19 // s-16.74 + shari r25, 63, r0 + shari r4, 14, r18 // s19.-14 + shari r19, 30, r19 // s-16.44 + muls.l r19, r18, r19 // s15.30 + xor r21, r0, r21 // You could also use the constant 1 << 27. + add r21, r25, r21 + sub r21, r19, r21 + shard r21, r1, r21 + sub r21, r0, r0 + blink tr0, r63 + ENDFUNC(GLOBAL(sdivsi3)) +/* This table has been generated by divtab.c . +Defects for bias -330: + Max defect: 6.081536e-07 at -1.000000e+00 + Min defect: 2.849516e-08 at 1.030651e+00 + Max 2nd step defect: 9.606539e-12 at -1.000000e+00 + Min 2nd step defect: 0.000000e+00 at 0.000000e+00 + Defect at 1: 1.238659e-07 + Defect at -2: 1.061708e-07 */ +#else /* ! __pic__ || ! __SHMEDIA__ */ + .section .rodata +#endif /* __pic__ */ +#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__) + .balign 2 + .type Local_div_table,@object + .size Local_div_table,128 +/* negative division constants */ + .word -16638 + .word -17135 + .word -17737 + .word -18433 + .word -19103 + .word -19751 + .word -20583 + .word -21383 + .word -22343 + .word -23353 + .word -24407 + .word -25582 + .word -26863 + .word -28382 + .word -29965 + .word -31800 +/* negative division factors */ + .byte 66 + .byte 70 + .byte 75 + .byte 81 + .byte 87 + .byte 93 + .byte 101 + .byte 109 + .byte 119 + .byte 130 + .byte 142 + .byte 156 + .byte 172 + .byte 192 + .byte 214 + .byte 241 + .skip 16 +Local_div_table: + .skip 16 +/* positive division factors */ + .byte 241 + .byte 214 + .byte 192 + .byte 172 + .byte 156 + .byte 142 + .byte 130 + .byte 119 + .byte 109 + .byte 101 + .byte 93 + .byte 87 + .byte 81 + .byte 75 + .byte 70 + .byte 66 +/* positive division constants */ + .word 31801 + .word 29966 + .word 28383 + .word 26864 + .word 25583 + .word 24408 + .word 23354 + .word 22344 + .word 21384 + .word 20584 + .word 19752 + .word 19104 + .word 18434 + .word 17738 + .word 17136 + .word 16639 + .section .rodata +#endif /* TEXT_DATA_BUG */ + .balign 2 + .type GLOBAL(div_table),@object + .size GLOBAL(div_table),128 +/* negative division constants */ + .word -16638 + .word -17135 + .word -17737 + .word -18433 + .word -19103 + .word -19751 + .word -20583 + .word -21383 + .word -22343 + .word -23353 + .word -24407 + .word -25582 + .word -26863 + .word -28382 + .word -29965 + .word -31800 +/* negative division factors */ + .byte 66 + .byte 70 + .byte 75 + .byte 81 + .byte 87 + .byte 93 + .byte 101 + .byte 109 + .byte 119 + .byte 130 + .byte 142 + .byte 156 + .byte 172 + .byte 192 + .byte 214 + .byte 241 + .skip 16 + .global GLOBAL(div_table) +GLOBAL(div_table): + HIDDEN_ALIAS(div_table_internal,div_table) + .skip 16 +/* positive division factors */ + .byte 241 + .byte 214 + .byte 192 + .byte 172 + .byte 156 + .byte 142 + .byte 130 + .byte 119 + .byte 109 + .byte 101 + .byte 93 + .byte 87 + .byte 81 + .byte 75 + .byte 70 + .byte 66 +/* positive division constants */ + .word 31801 + .word 29966 + .word 28383 + .word 26864 + .word 25583 + .word 24408 + .word 23354 + .word 22344 + .word 21384 + .word 20584 + .word 19752 + .word 19104 + .word 18434 + .word 17738 + .word 17136 + .word 16639 + +#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) +/* This code used shld, thus is not suitable for SH1 / SH2. */ + +/* Signed / unsigned division without use of FPU, optimized for SH4. + Uses a lookup table for divisors in the range -128 .. +128, and + div1 with case distinction for larger divisors in three more ranges. + The code is lumped together with the table to allow the use of mova. */ +#ifdef __LITTLE_ENDIAN__ +#define L_LSB 0 +#define L_LSWMSB 1 +#define L_MSWLSB 2 +#else +#define L_LSB 3 +#define L_LSWMSB 2 +#define L_MSWLSB 1 +#endif + + .balign 4 + .global GLOBAL(udivsi3_i4i) + FUNC(GLOBAL(udivsi3_i4i)) +GLOBAL(udivsi3_i4i): + mov.w LOCAL(c128_w), r1 + div0u + mov r4,r0 + shlr8 r0 + cmp/hi r1,r5 + extu.w r5,r1 + bf LOCAL(udiv_le128) + cmp/eq r5,r1 + bf LOCAL(udiv_ge64k) + shlr r0 + mov r5,r1 + shll16 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 + div1 r5,r0 + div1 r5,r0 + bra LOCAL(udiv_25) + div1 r5,r0 + +LOCAL(div_le128): + mova LOCAL(div_table_ix),r0 + bra LOCAL(div_le128_2) + mov.b @(r0,r5),r1 +LOCAL(udiv_le128): + mov.l r4,@-r15 + mova LOCAL(div_table_ix),r0 + mov.b @(r0,r5),r1 + mov.l r5,@-r15 +LOCAL(div_le128_2): + mova LOCAL(div_table_inv),r0 + mov.l @(r0,r1),r1 + mov r5,r0 + tst #0xfe,r0 + mova LOCAL(div_table_clz),r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + bt/s LOCAL(div_by_1) + mov r4,r0 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + rts + shld r1,r0 + +LOCAL(div_by_1_neg): + neg r4,r0 +LOCAL(div_by_1): + mov.l @r15+,r5 + rts + mov.l @r15+,r4 + +LOCAL(div_ge64k): + bt/s LOCAL(div_r8) + div0u + shll8 r5 + bra LOCAL(div_ge64k_2) + div1 r5,r0 +LOCAL(udiv_ge64k): + cmp/hi r0,r5 + mov r5,r1 + bt LOCAL(udiv_r8) + shll8 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 +LOCAL(div_ge64k_2): + div1 r5,r0 + mov.l LOCAL(zero_l),r1 + .rept 4 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w LOCAL(m256_w),r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra LOCAL(div_ge64k_end) + xor r4,r0 + +LOCAL(div_r8): + shll16 r4 + bra LOCAL(div_r8_2) + shll8 r4 +LOCAL(udiv_r8): + mov.l r4,@-r15 + shll16 r4 + clrt + shll8 r4 + mov.l r5,@-r15 +LOCAL(div_r8_2): + rotcl r4 + mov r0,r1 + div1 r5,r1 + mov r4,r0 + rotcl r0 + mov r5,r4 + div1 r5,r1 + .rept 5 + rotcl r0; div1 r5,r1 + .endr + rotcl r0 + mov.l @r15+,r5 + div1 r4,r1 + mov.l @r15+,r4 + rts + rotcl r0 + + ENDFUNC(GLOBAL(udivsi3_i4i)) + + .global GLOBAL(sdivsi3_i4i) + FUNC(GLOBAL(sdivsi3_i4i)) + /* This is link-compatible with a GLOBAL(sdivsi3) call, + but we effectively clobber only r1. */ +GLOBAL(sdivsi3_i4i): + mov.l r4,@-r15 + cmp/pz r5 + mov.w LOCAL(c128_w), r1 + bt/s LOCAL(pos_divisor) + cmp/pz r4 + mov.l r5,@-r15 + neg r5,r5 + bt/s LOCAL(neg_result) + cmp/hi r1,r5 + neg r4,r4 +LOCAL(pos_result): + extu.w r5,r0 + bf LOCAL(div_le128) + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s LOCAL(div_ge64k) + cmp/hi r0,r5 + div0u + shll16 r5 + div1 r5,r0 + div1 r5,r0 + div1 r5,r0 +LOCAL(udiv_25): + mov.l LOCAL(zero_l),r1 + div1 r5,r0 + div1 r5,r0 + mov.l r1,@-r15 + .rept 3 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +LOCAL(div_ge64k_end): + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r0 + mov.l @r15+,r5 + or r4,r0 + mov.l @r15+,r4 + rts + rotcl r0 + +LOCAL(div_le128_neg): + tst #0xfe,r0 + mova LOCAL(div_table_ix),r0 + mov.b @(r0,r5),r1 + mova LOCAL(div_table_inv),r0 + bt/s LOCAL(div_by_1_neg) + mov.l @(r0,r1),r1 + mova LOCAL(div_table_clz),r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + shld r1,r0 + rts + neg r0,r0 + +LOCAL(pos_divisor): + mov.l r5,@-r15 + bt/s LOCAL(pos_result) + cmp/hi r1,r5 + neg r4,r4 +LOCAL(neg_result): + extu.w r5,r0 + bf LOCAL(div_le128_neg) + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s LOCAL(div_ge64k_neg) + cmp/hi r0,r5 + div0u + mov.l LOCAL(zero_l),r1 + shll16 r5 + div1 r5,r0 + mov.l r1,@-r15 + .rept 7 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +LOCAL(div_ge64k_neg_end): + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r1 + mov.l @r15+,r5 + or r4,r1 +LOCAL(div_r8_neg_end): + mov.l @r15+,r4 + rotcl r1 + rts + neg r1,r0 + +LOCAL(div_ge64k_neg): + bt/s LOCAL(div_r8_neg) + div0u + shll8 r5 + mov.l LOCAL(zero_l),r1 + .rept 6 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w LOCAL(m256_w),r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra LOCAL(div_ge64k_neg_end) + xor r4,r0 + +LOCAL(c128_w): + .word 128 + +LOCAL(div_r8_neg): + clrt + shll16 r4 + mov r4,r1 + shll8 r1 + mov r5,r4 + .rept 7 + rotcl r1; div1 r5,r0 + .endr + mov.l @r15+,r5 + rotcl r1 + bra LOCAL(div_r8_neg_end) + div1 r4,r0 + +LOCAL(m256_w): + .word 0xff00 +/* This table has been generated by divtab-sh4.c. */ + .balign 4 +LOCAL(div_table_clz): + .byte 0 + .byte 1 + .byte 0 + .byte -1 + .byte -1 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 +/* Lookup table translating positive divisor to index into table of + normalized inverse. N.B. the '0' entry is also the last entry of the + previous table, and causes an unaligned access for division by zero. */ +LOCAL(div_table_ix): + .byte -6 + .byte -128 + .byte -128 + .byte 0 + .byte -128 + .byte -64 + .byte 0 + .byte 64 + .byte -128 + .byte -96 + .byte -64 + .byte -32 + .byte 0 + .byte 32 + .byte 64 + .byte 96 + .byte -128 + .byte -112 + .byte -96 + .byte -80 + .byte -64 + .byte -48 + .byte -32 + .byte -16 + .byte 0 + .byte 16 + .byte 32 + .byte 48 + .byte 64 + .byte 80 + .byte 96 + .byte 112 + .byte -128 + .byte -120 + .byte -112 + .byte -104 + .byte -96 + .byte -88 + .byte -80 + .byte -72 + .byte -64 + .byte -56 + .byte -48 + .byte -40 + .byte -32 + .byte -24 + .byte -16 + .byte -8 + .byte 0 + .byte 8 + .byte 16 + .byte 24 + .byte 32 + .byte 40 + .byte 48 + .byte 56 + .byte 64 + .byte 72 + .byte 80 + .byte 88 + .byte 96 + .byte 104 + .byte 112 + .byte 120 + .byte -128 + .byte -124 + .byte -120 + .byte -116 + .byte -112 + .byte -108 + .byte -104 + .byte -100 + .byte -96 + .byte -92 + .byte -88 + .byte -84 + .byte -80 + .byte -76 + .byte -72 + .byte -68 + .byte -64 + .byte -60 + .byte -56 + .byte -52 + .byte -48 + .byte -44 + .byte -40 + .byte -36 + .byte -32 + .byte -28 + .byte -24 + .byte -20 + .byte -16 + .byte -12 + .byte -8 + .byte -4 + .byte 0 + .byte 4 + .byte 8 + .byte 12 + .byte 16 + .byte 20 + .byte 24 + .byte 28 + .byte 32 + .byte 36 + .byte 40 + .byte 44 + .byte 48 + .byte 52 + .byte 56 + .byte 60 + .byte 64 + .byte 68 + .byte 72 + .byte 76 + .byte 80 + .byte 84 + .byte 88 + .byte 92 + .byte 96 + .byte 100 + .byte 104 + .byte 108 + .byte 112 + .byte 116 + .byte 120 + .byte 124 + .byte -128 +/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ + .balign 4 +LOCAL(zero_l): + .long 0x0 + .long 0xF81F81F9 + .long 0xF07C1F08 + .long 0xE9131AC0 + .long 0xE1E1E1E2 + .long 0xDAE6076C + .long 0xD41D41D5 + .long 0xCD856891 + .long 0xC71C71C8 + .long 0xC0E07039 + .long 0xBACF914D + .long 0xB4E81B4F + .long 0xAF286BCB + .long 0xA98EF607 + .long 0xA41A41A5 + .long 0x9EC8E952 + .long 0x9999999A + .long 0x948B0FCE + .long 0x8F9C18FA + .long 0x8ACB90F7 + .long 0x86186187 + .long 0x81818182 + .long 0x7D05F418 + .long 0x78A4C818 + .long 0x745D1746 + .long 0x702E05C1 + .long 0x6C16C16D + .long 0x68168169 + .long 0x642C8591 + .long 0x60581606 + .long 0x5C9882BA + .long 0x58ED2309 +LOCAL(div_table_inv): + .long 0x55555556 + .long 0x51D07EAF + .long 0x4E5E0A73 + .long 0x4AFD6A06 + .long 0x47AE147B + .long 0x446F8657 + .long 0x41414142 + .long 0x3E22CBCF + .long 0x3B13B13C + .long 0x38138139 + .long 0x3521CFB3 + .long 0x323E34A3 + .long 0x2F684BDB + .long 0x2C9FB4D9 + .long 0x29E4129F + .long 0x27350B89 + .long 0x24924925 + .long 0x21FB7813 + .long 0x1F7047DD + .long 0x1CF06ADB + .long 0x1A7B9612 + .long 0x18118119 + .long 0x15B1E5F8 + .long 0x135C8114 + .long 0x11111112 + .long 0xECF56BF + .long 0xC9714FC + .long 0xA6810A7 + .long 0x8421085 + .long 0x624DD30 + .long 0x4104105 + .long 0x2040811 + /* maximum error: 0.987342 scaled: 0.921875*/ + + ENDFUNC(GLOBAL(sdivsi3_i4i)) +#endif /* SH3 / SH4 */ + +#endif /* L_div_table */ + +#ifdef L_udiv_qrnnd_16 +#if !__SHMEDIA__ + HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) + /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ + /* n1 < d, but n1 might be larger than d1. */ + .global GLOBAL(udiv_qrnnd_16) + .balign 8 +GLOBAL(udiv_qrnnd_16): + div0u + cmp/hi r6,r0 + bt .Lots + .rept 16 + div1 r6,r0 + .endr + extu.w r0,r1 + bt 0f + add r6,r0 +0: rotcl r1 + mulu.w r1,r5 + xtrct r4,r0 + swap.w r0,r0 + sts macl,r2 + cmp/hs r2,r0 + sub r2,r0 + bt 0f + addc r5,r0 + add #-1,r1 + bt 0f +1: add #-1,r1 + rts + add r5,r0 + .balign 8 +.Lots: + sub r5,r0 + swap.w r4,r1 + xtrct r0,r1 + clrt + mov r1,r0 + addc r5,r0 + mov #-1,r1 + SL1(bf, 1b, + shlr16 r1) +0: rts + nop + ENDFUNC(GLOBAL(udiv_qrnnd_16)) +#endif /* !__SHMEDIA__ */ +#endif /* L_udiv_qrnnd_16 */ diff --git a/gcc/config/sh/lib1funcs.h b/gcc/config/sh/lib1funcs.h new file mode 100644 index 000000000..af4b41cc3 --- /dev/null +++ b/gcc/config/sh/lib1funcs.h @@ -0,0 +1,76 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2009 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifdef __ELF__ +#define LOCAL(X) .L_##X +#define FUNC(X) .type X,@function +#define HIDDEN_FUNC(X) FUNC(X); .hidden X +#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X) +#define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X +#define ENDFUNC(X) ENDFUNC0(X) +#else +#define LOCAL(X) L_##X +#define FUNC(X) +#define HIDDEN_FUNC(X) +#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y) +#define ENDFUNC(X) +#endif + +#define CONCAT(A,B) A##B +#define GLOBAL0(U,X) CONCAT(U,__##X) +#define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X) + +#define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y) + +#if defined __SH2A__ && defined __FMOVD_ENABLED__ +#undef FMOVD_WORKS +#define FMOVD_WORKS +#endif + +#ifdef __LITTLE_ENDIAN__ +#define DR00 fr1 +#define DR01 fr0 +#define DR20 fr3 +#define DR21 fr2 +#define DR40 fr5 +#define DR41 fr4 +#else /* !__LITTLE_ENDIAN__ */ +#define DR00 fr0 +#define DR01 fr1 +#define DR20 fr2 +#define DR21 fr3 +#define DR40 fr4 +#define DR41 fr5 +#endif /* !__LITTLE_ENDIAN__ */ + +#ifdef __sh1__ +#define SL(branch, dest, in_slot, in_slot_arg2) \ + in_slot, in_slot_arg2; branch dest +#define SL1(branch, dest, in_slot) \ + in_slot; branch dest +#else /* ! __sh1__ */ +#define SL(branch, dest, in_slot, in_slot_arg2) \ + branch##.s dest; in_slot, in_slot_arg2 +#define SL1(branch, dest, in_slot) \ + branch##/s dest; in_slot +#endif /* !__sh1__ */ diff --git a/gcc/config/sh/libgcc-excl.ver b/gcc/config/sh/libgcc-excl.ver new file mode 100644 index 000000000..325c74054 --- /dev/null +++ b/gcc/config/sh/libgcc-excl.ver @@ -0,0 +1,8 @@ +# Exclude various symbols which should not be visible in libgcc.so for SH. +%exclude { + __ashlsi3 + __ashrsi3 + __lshrsi3 + __mulsi3 # this is an SH1-only symbol. + __udivsi3 +} diff --git a/gcc/config/sh/libgcc-glibc.ver b/gcc/config/sh/libgcc-glibc.ver new file mode 100644 index 000000000..b8ec32653 --- /dev/null +++ b/gcc/config/sh/libgcc-glibc.ver @@ -0,0 +1,48 @@ +# Copyright (C) 2002, 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# In order to work around the very problems that force us to now generally +# create a libgcc.so, glibc reexported a number of routines from libgcc.a. +# By now choosing the same version tags for these specific routines, we +# maintain enough binary compatibility to allow future versions of glibc +# to defer implementation of these routines to libgcc.so via DT_AUXILIARY. + +# Note that we cannot use the default libgcc-glibc.ver file on sh, +# because GLIBC_2.0 does not exist on this architecture, as the first +# ever glibc release on the platform was GLIBC_2.2. + +%exclude { + __register_frame + __register_frame_table + __deregister_frame + __register_frame_info + __deregister_frame_info + __frame_state_for + __register_frame_info_table +} + +%inherit GCC_3.0 GLIBC_2.2 +GLIBC_2.2 { + __register_frame + __register_frame_table + __deregister_frame + __register_frame_info + __deregister_frame_info + __frame_state_for + __register_frame_info_table +} diff --git a/gcc/config/sh/linux-atomic.asm b/gcc/config/sh/linux-atomic.asm new file mode 100644 index 000000000..743c61bb7 --- /dev/null +++ b/gcc/config/sh/linux-atomic.asm @@ -0,0 +1,223 @@ +/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + + +!! Linux specific atomic routines for the Renesas / SuperH SH CPUs. +!! Linux kernel for SH3/4 has implemented the support for software +!! atomic sequences. + +#define FUNC(X) .type X,@function +#define HIDDEN_FUNC(X) FUNC(X); .hidden X +#define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X +#define ENDFUNC(X) ENDFUNC0(X) + +#if ! __SH5__ + +#define ATOMIC_TEST_AND_SET(N,T,EXT) \ + .global __sync_lock_test_and_set_##N; \ + HIDDEN_FUNC(__sync_lock_test_and_set_##N); \ + .align 2; \ +__sync_lock_test_and_set_##N:; \ + mova 1f, r0; \ + nop; \ + mov r15, r1; \ + mov #(0f-1f), r15; \ +0: mov.##T @r4, r2; \ + mov.##T r5, @r4; \ +1: mov r1, r15; \ + rts; \ + EXT r2, r0; \ + ENDFUNC(__sync_lock_test_and_set_##N) + +ATOMIC_TEST_AND_SET (1,b,extu.b) +ATOMIC_TEST_AND_SET (2,w,extu.w) +ATOMIC_TEST_AND_SET (4,l,mov) + +#define ATOMIC_COMPARE_AND_SWAP(N,T,EXTS,EXT) \ + .global __sync_val_compare_and_swap_##N; \ + HIDDEN_FUNC(__sync_val_compare_and_swap_##N); \ + .align 2; \ +__sync_val_compare_and_swap_##N:; \ + mova 1f, r0; \ + EXTS r5, r5; \ + mov r15, r1; \ + mov #(0f-1f), r15; \ +0: mov.##T @r4, r2; \ + cmp/eq r2, r5; \ + bf 1f; \ + mov.##T r6, @r4; \ +1: mov r1, r15; \ + rts; \ + EXT r2, r0; \ + ENDFUNC(__sync_val_compare_and_swap_##N) + +ATOMIC_COMPARE_AND_SWAP (1,b,exts.b,extu.b) +ATOMIC_COMPARE_AND_SWAP (2,w,exts.w,extu.w) +ATOMIC_COMPARE_AND_SWAP (4,l,mov,mov) + +#define ATOMIC_BOOL_COMPARE_AND_SWAP(N,T,EXTS) \ + .global __sync_bool_compare_and_swap_##N; \ + HIDDEN_FUNC(__sync_bool_compare_and_swap_##N); \ + .align 2; \ +__sync_bool_compare_and_swap_##N:; \ + mova 1f, r0; \ + EXTS r5, r5; \ + mov r15, r1; \ + mov #(0f-1f), r15; \ +0: mov.##T @r4, r2; \ + cmp/eq r2, r5; \ + bf 1f; \ + mov.##T r6, @r4; \ +1: mov r1, r15; \ + rts; \ + movt r0; \ + ENDFUNC(__sync_bool_compare_and_swap_##N) + +ATOMIC_BOOL_COMPARE_AND_SWAP (1,b,exts.b) +ATOMIC_BOOL_COMPARE_AND_SWAP (2,w,exts.w) +ATOMIC_BOOL_COMPARE_AND_SWAP (4,l,mov) + +#define ATOMIC_FETCH_AND_OP(OP,N,T,EXT) \ + .global __sync_fetch_and_##OP##_##N; \ + HIDDEN_FUNC(__sync_fetch_and_##OP##_##N); \ + .align 2; \ +__sync_fetch_and_##OP##_##N:; \ + mova 1f, r0; \ + nop; \ + mov r15, r1; \ + mov #(0f-1f), r15; \ +0: mov.##T @r4, r2; \ + mov r5, r3; \ + OP r2, r3; \ + mov.##T r3, @r4; \ +1: mov r1, r15; \ + rts; \ + EXT r2, r0; \ + ENDFUNC(__sync_fetch_and_##OP##_##N) + +ATOMIC_FETCH_AND_OP(add,1,b,extu.b) +ATOMIC_FETCH_AND_OP(add,2,w,extu.w) +ATOMIC_FETCH_AND_OP(add,4,l,mov) + +ATOMIC_FETCH_AND_OP(or,1,b,extu.b) +ATOMIC_FETCH_AND_OP(or,2,w,extu.w) +ATOMIC_FETCH_AND_OP(or,4,l,mov) + +ATOMIC_FETCH_AND_OP(and,1,b,extu.b) +ATOMIC_FETCH_AND_OP(and,2,w,extu.w) +ATOMIC_FETCH_AND_OP(and,4,l,mov) + +ATOMIC_FETCH_AND_OP(xor,1,b,extu.b) +ATOMIC_FETCH_AND_OP(xor,2,w,extu.w) +ATOMIC_FETCH_AND_OP(xor,4,l,mov) + +#define ATOMIC_FETCH_AND_COMBOP(OP,OP0,OP1,N,T,EXT) \ + .global __sync_fetch_and_##OP##_##N; \ + HIDDEN_FUNC(__sync_fetch_and_##OP##_##N); \ + .align 2; \ +__sync_fetch_and_##OP##_##N:; \ + mova 1f, r0; \ + mov r15, r1; \ + mov #(0f-1f), r15; \ +0: mov.##T @r4, r2; \ + mov r5, r3; \ + OP0 r2, r3; \ + OP1 r3, r3; \ + mov.##T r3, @r4; \ +1: mov r1, r15; \ + rts; \ + EXT r2, r0; \ + ENDFUNC(__sync_fetch_and_##OP##_##N) + +ATOMIC_FETCH_AND_COMBOP(sub,sub,neg,1,b,extu.b) +ATOMIC_FETCH_AND_COMBOP(sub,sub,neg,2,w,extu.w) +ATOMIC_FETCH_AND_COMBOP(sub,sub,neg,4,l,mov) + +ATOMIC_FETCH_AND_COMBOP(nand,and,not,1,b,extu.b) +ATOMIC_FETCH_AND_COMBOP(nand,and,not,2,w,extu.w) +ATOMIC_FETCH_AND_COMBOP(nand,and,not,4,l,mov) + +#define ATOMIC_OP_AND_FETCH(OP,N,T,EXT) \ + .global __sync_##OP##_and_fetch_##N; \ + HIDDEN_FUNC(__sync_##OP##_and_fetch_##N); \ + .align 2; \ +__sync_##OP##_and_fetch_##N:; \ + mova 1f, r0; \ + nop; \ + mov r15, r1; \ + mov #(0f-1f), r15; \ +0: mov.##T @r4, r2; \ + mov r5, r3; \ + OP r2, r3; \ + mov.##T r3, @r4; \ +1: mov r1, r15; \ + rts; \ + EXT r3, r0; \ + ENDFUNC(__sync_##OP##_and_fetch_##N) + +ATOMIC_OP_AND_FETCH(add,1,b,extu.b) +ATOMIC_OP_AND_FETCH(add,2,w,extu.w) +ATOMIC_OP_AND_FETCH(add,4,l,mov) + +ATOMIC_OP_AND_FETCH(or,1,b,extu.b) +ATOMIC_OP_AND_FETCH(or,2,w,extu.w) +ATOMIC_OP_AND_FETCH(or,4,l,mov) + +ATOMIC_OP_AND_FETCH(and,1,b,extu.b) +ATOMIC_OP_AND_FETCH(and,2,w,extu.w) +ATOMIC_OP_AND_FETCH(and,4,l,mov) + +ATOMIC_OP_AND_FETCH(xor,1,b,extu.b) +ATOMIC_OP_AND_FETCH(xor,2,w,extu.w) +ATOMIC_OP_AND_FETCH(xor,4,l,mov) + +#define ATOMIC_COMBOP_AND_FETCH(OP,OP0,OP1,N,T,EXT) \ + .global __sync_##OP##_and_fetch_##N; \ + HIDDEN_FUNC(__sync_##OP##_and_fetch_##N); \ + .align 2; \ +__sync_##OP##_and_fetch_##N:; \ + mova 1f, r0; \ + mov r15, r1; \ + mov #(0f-1f), r15; \ +0: mov.##T @r4, r2; \ + mov r5, r3; \ + OP0 r2, r3; \ + OP1 r3, r3; \ + mov.##T r3, @r4; \ +1: mov r1, r15; \ + rts; \ + EXT r3, r0; \ + ENDFUNC(__sync_##OP##_and_fetch_##N) + +ATOMIC_COMBOP_AND_FETCH(sub,sub,neg,1,b,extu.b) +ATOMIC_COMBOP_AND_FETCH(sub,sub,neg,2,w,extu.w) +ATOMIC_COMBOP_AND_FETCH(sub,sub,neg,4,l,mov) + +ATOMIC_COMBOP_AND_FETCH(nand,and,not,1,b,extu.b) +ATOMIC_COMBOP_AND_FETCH(nand,and,not,2,w,extu.w) +ATOMIC_COMBOP_AND_FETCH(nand,and,not,4,l,mov) + +.section .note.GNU-stack,"",%progbits +.previous + +#endif /* ! __SH5__ */ diff --git a/gcc/config/sh/linux-unwind.h b/gcc/config/sh/linux-unwind.h new file mode 100644 index 000000000..5a78e3172 --- /dev/null +++ b/gcc/config/sh/linux-unwind.h @@ -0,0 +1,256 @@ +/* DWARF2 EH unwinding support for SH Linux. + Copyright (C) 2004, 2005, 2006, 2007, 2009, 2012 Free Software Foundation, + Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. + Don't use this at all if inhibit_libc is used. */ + +#ifndef inhibit_libc + +#include +#include +#include "insn-constants.h" + +# if defined (__SH5__) +#define SH_DWARF_FRAME_GP0 0 +#define SH_DWARF_FRAME_FP0 77 +#define SH_DWARF_FRAME_BT0 68 +#define SH_DWARF_FRAME_PR_MEDIA 18 +#define SH_DWARF_FRAME_SR 65 +#define SH_DWARF_FRAME_FPSCR 76 +#else +#define SH_DWARF_FRAME_GP0 0 +#define SH_DWARF_FRAME_FP0 25 +#define SH_DWARF_FRAME_XD0 87 +#define SH_DWARF_FRAME_PR 17 +#define SH_DWARF_FRAME_GBR 18 +#define SH_DWARF_FRAME_MACH 20 +#define SH_DWARF_FRAME_MACL 21 +#define SH_DWARF_FRAME_PC 16 +#define SH_DWARF_FRAME_SR 22 +#define SH_DWARF_FRAME_FPUL 23 +#define SH_DWARF_FRAME_FPSCR 24 +#endif /* defined (__SH5__) */ + +#if defined (__SH5__) + +#define MD_FALLBACK_FRAME_STATE_FOR shmedia_fallback_frame_state + +static _Unwind_Reason_Code +shmedia_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned char *pc = context->ra; + struct sigcontext *sc; + long new_cfa; + int i, r; + + /* movi 0x10,r9; shori 0x77,r9; trapa r9; nop (sigreturn) */ + /* movi 0x10,r9; shori 0xad,r9; trapa r9; nop (rt_sigreturn) */ + if ((*(unsigned long *) (pc-1) == 0xcc004090) + && (*(unsigned long *) (pc+3) == 0xc801dc90) + && (*(unsigned long *) (pc+7) == 0x6c91fff0) + && (*(unsigned long *) (pc+11) == 0x6ff0fff0)) + sc = context->cfa; + else if ((*(unsigned long *) (pc-1) == 0xcc004090) + && (*(unsigned long *) (pc+3) == 0xc802b490) + && (*(unsigned long *) (pc+7) == 0x6c91fff0) + && (*(unsigned long *) (pc+11) == 0x6ff0fff0)) + { + struct rt_sigframe { + siginfo_t *pinfo; + void *puc; + siginfo_t info; + struct ucontext uc; + } *rt_ = context->cfa; + /* The void * cast is necessary to avoid an aliasing warning. + The aliasing warning is correct, but should not be a problem + because it does not alias anything. */ + sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext; + } + else + return _URC_END_OF_STACK; + + new_cfa = sc->sc_regs[15]; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 15; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + + for (i = 0; i < 63; i++) + { + if (i == 15) + continue; + + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = (long)&(sc->sc_regs[i]) - new_cfa; + } + + fs->regs.reg[SH_DWARF_FRAME_SR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_SR].loc.offset + = (long)&(sc->sc_sr) - new_cfa; + + r = SH_DWARF_FRAME_BT0; + for (i = 0; i < 8; i++) + { + fs->regs.reg[r+i].how = REG_SAVED_OFFSET; + fs->regs.reg[r+i].loc.offset + = (long)&(sc->sc_tregs[i]) - new_cfa; + } + + r = SH_DWARF_FRAME_FP0; + for (i = 0; i < 32; i++) + { + fs->regs.reg[r+i].how = REG_SAVED_OFFSET; + fs->regs.reg[r+i].loc.offset + = (long)&(sc->sc_fpregs[i]) - new_cfa; + } + + fs->regs.reg[SH_DWARF_FRAME_FPSCR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_FPSCR].loc.offset + = (long)&(sc->sc_fpscr) - new_cfa; + + /* We use the slot for the zero register to save return address. */ + fs->regs.reg[63].how = REG_SAVED_OFFSET; + fs->regs.reg[63].loc.offset + = (long)&(sc->sc_pc) - new_cfa; + fs->retaddr_column = 63; + fs->signal_frame = 1; + return _URC_NO_REASON; +} + +#else /* defined (__SH5__) */ + +#define MD_FALLBACK_FRAME_STATE_FOR sh_fallback_frame_state + +static _Unwind_Reason_Code +sh_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned char *pc = context->ra; + struct sigcontext *sc; + long new_cfa; + int i; +#if defined (__SH3E__) || defined (__SH4__) + int r; +#endif + + /* mov.w 1f,r3; trapa #0x10; 1: .short 0x77 (sigreturn) */ + /* mov.w 1f,r3; trapa #0x10; 1: .short 0xad (rt_sigreturn) */ + /* Newer kernel uses pad instructions to avoid an SH-4 core bug. */ + /* mov.w 1f,r3; trapa #0x10; or r0,r0; or r0,r0; or r0,r0; or r0,r0; + or r0,r0; 1: .short 0x77 (sigreturn) */ + /* mov.w 1f,r3; trapa #0x10; or r0,r0; or r0,r0; or r0,r0; or r0,r0; + or r0,r0; 1: .short 0xad (rt_sigreturn) */ + if (((*(unsigned short *) (pc+0) == 0x9300) + && (*(unsigned short *) (pc+2) == 0xc310) + && (*(unsigned short *) (pc+4) == 0x0077)) + || (((*(unsigned short *) (pc+0) == 0x9305) + && (*(unsigned short *) (pc+2) == 0xc310) + && (*(unsigned short *) (pc+14) == 0x0077)))) + sc = context->cfa; + else if (((*(unsigned short *) (pc+0) == 0x9300) + && (*(unsigned short *) (pc+2) == 0xc310) + && (*(unsigned short *) (pc+4) == 0x00ad)) + || (((*(unsigned short *) (pc+0) == 0x9305) + && (*(unsigned short *) (pc+2) == 0xc310) + && (*(unsigned short *) (pc+14) == 0x00ad)))) + { + struct rt_sigframe { + siginfo_t info; + struct ucontext uc; + } *rt_ = context->cfa; + /* The void * cast is necessary to avoid an aliasing warning. + The aliasing warning is correct, but should not be a problem + because it does not alias anything. */ + sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext; + } + else + return _URC_END_OF_STACK; + + new_cfa = sc->sc_regs[15]; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 15; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + + for (i = 0; i < 15; i++) + { + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = (long)&(sc->sc_regs[i]) - new_cfa; + } + + fs->regs.reg[SH_DWARF_FRAME_PR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_PR].loc.offset + = (long)&(sc->sc_pr) - new_cfa; + fs->regs.reg[SH_DWARF_FRAME_SR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_SR].loc.offset + = (long)&(sc->sc_sr) - new_cfa; + fs->regs.reg[SH_DWARF_FRAME_GBR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_GBR].loc.offset + = (long)&(sc->sc_gbr) - new_cfa; + fs->regs.reg[SH_DWARF_FRAME_MACH].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_MACH].loc.offset + = (long)&(sc->sc_mach) - new_cfa; + fs->regs.reg[SH_DWARF_FRAME_MACL].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_MACL].loc.offset + = (long)&(sc->sc_macl) - new_cfa; + +#if defined (__SH3E__) || defined (__SH4__) + r = SH_DWARF_FRAME_FP0; + for (i = 0; i < 16; i++) + { + fs->regs.reg[r+i].how = REG_SAVED_OFFSET; + fs->regs.reg[r+i].loc.offset + = (long)&(sc->sc_fpregs[i]) - new_cfa; + } + + r = SH_DWARF_FRAME_XD0; + for (i = 0; i < 8; i++) + { + fs->regs.reg[r+i].how = REG_SAVED_OFFSET; + fs->regs.reg[r+i].loc.offset + = (long)&(sc->sc_xfpregs[2*i]) - new_cfa; + } + + fs->regs.reg[SH_DWARF_FRAME_FPUL].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_FPUL].loc.offset + = (long)&(sc->sc_fpul) - new_cfa; + fs->regs.reg[SH_DWARF_FRAME_FPSCR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_FPSCR].loc.offset + = (long)&(sc->sc_fpscr) - new_cfa; +#endif + + fs->regs.reg[SH_DWARF_FRAME_PC].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_PC].loc.offset + = (long)&(sc->sc_pc) - new_cfa; + fs->retaddr_column = SH_DWARF_FRAME_PC; + fs->signal_frame = 1; + return _URC_NO_REASON; +} +#endif /* defined (__SH5__) */ + +#endif /* inhibit_libc */ diff --git a/gcc/config/sh/linux.h b/gcc/config/sh/linux.h new file mode 100644 index 000000000..a090dae1c --- /dev/null +++ b/gcc/config/sh/linux.h @@ -0,0 +1,137 @@ +/* Definitions for SH running Linux-based GNU systems using ELF + Copyright (C) 1999, 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2010 + Free Software Foundation, Inc. + Contributed by Kazumoto Kojima + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Run-time Target Specification. */ +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (SH GNU/Linux with ELF)", stderr); + +/* Enable DWARF 2 exceptions. */ +#undef DWARF2_UNWIND_INFO +#define DWARF2_UNWIND_INFO 1 + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "\ + %{posix:-D_POSIX_SOURCE} \ + %{pthread:-D_REENTRANT -D_PTHREADS} \ +" + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + LINUX_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (0) + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT \ + (TARGET_CPU_DEFAULT | MASK_USERMODE | TARGET_ENDIAN_DEFAULT \ + | TARGET_OPT_DEFAULT) + +#define TARGET_ASM_FILE_END file_end_indicate_exec_stack + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" + +#undef SUBTARGET_LINK_EMUL_SUFFIX +#define SUBTARGET_LINK_EMUL_SUFFIX "_linux" +#undef SUBTARGET_LINK_SPEC +#define SUBTARGET_LINK_SPEC \ + "%{shared:-shared} \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " LINUX_DYNAMIC_LINKER "} \ + %{static:-static}" + +/* Output assembler code to STREAM to call the profiler. */ + +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER(STREAM,LABELNO) \ + do { \ + if (TARGET_SHMEDIA) \ + { \ + fprintf (STREAM, "\tpt\t1f,tr1\n"); \ + fprintf (STREAM, "\taddi.l\tr15,-8,r15\n"); \ + fprintf (STREAM, "\tst.l\tr15,0,r18\n"); \ + if (flag_pic) \ + { \ + const char *gofs = "(datalabel _GLOBAL_OFFSET_TABLE_-(0f-.))"; \ + fprintf (STREAM, "\tmovi\t((%s>>16)&0xffff),r21\n", gofs); \ + fprintf (STREAM, "\tshori\t(%s & 0xffff),r21\n", gofs); \ + fprintf (STREAM, "0:\tptrel/u\tr21,tr0\n"); \ + fprintf (STREAM, "\tmovi\t((mcount@GOTPLT)&0xffff),r22\n"); \ + fprintf (STREAM, "\tgettr\ttr0,r21\n"); \ + fprintf (STREAM, "\tadd.l\tr21,r22,r21\n"); \ + fprintf (STREAM, "\tld.l\tr21,0,r21\n"); \ + fprintf (STREAM, "\tptabs\tr21,tr0\n"); \ + } \ + else \ + fprintf (STREAM, "\tpt\tmcount,tr0\n"); \ + fprintf (STREAM, "\tgettr\ttr1,r18\n"); \ + fprintf (STREAM, "\tblink\ttr0,r63\n"); \ + fprintf (STREAM, "1:\tld.l\tr15,0,r18\n"); \ + fprintf (STREAM, "\taddi.l\tr15,8,r15\n"); \ + } \ + else \ + { \ + if (flag_pic) \ + { \ + fprintf (STREAM, "\tmov.l\t3f,r1\n"); \ + fprintf (STREAM, "\tmova\t3f,r0\n"); \ + fprintf (STREAM, "\tadd\tr1,r0\n"); \ + fprintf (STREAM, "\tmov.l\t1f,r1\n"); \ + fprintf (STREAM, "\tmov.l\t@(r0,r1),r1\n"); \ + } \ + else \ + fprintf (STREAM, "\tmov.l\t1f,r1\n"); \ + fprintf (STREAM, "\tsts.l\tpr,@-r15\n"); \ + fprintf (STREAM, "\tmova\t2f,r0\n"); \ + fprintf (STREAM, "\tjmp\t@r1\n"); \ + fprintf (STREAM, "\tlds\tr0,pr\n"); \ + fprintf (STREAM, "\t.align\t2\n"); \ + if (flag_pic) \ + { \ + fprintf (STREAM, "1:\t.long\tmcount@GOT\n"); \ + fprintf (STREAM, "3:\t.long\t_GLOBAL_OFFSET_TABLE_\n"); \ + } \ + else \ + fprintf (STREAM, "1:\t.long\tmcount\n"); \ + fprintf (STREAM, "2:\tlds.l\t@r15+,pr\n"); \ + } \ + } while (0) + +#define MD_UNWIND_SUPPORT "config/sh/linux-unwind.h" + +/* For SH3 and SH4, we use a slot of the unwind frame which correspond + to a fake register number 16 as a placeholder for the return address + in MD_FALLBACK_FRAME_STATE_FOR and its content will be read with + _Unwind_GetGR which uses dwarf_reg_size_table to get the size of + the register. So the entry of dwarf_reg_size_table corresponding to + this slot must be set. To do this, we redefine DBX_REGISTER_NUMBER + so as to return itself for 16. */ +#undef DBX_REGISTER_NUMBER +#define DBX_REGISTER_NUMBER(REGNO) \ + ((! TARGET_SH5 && (REGNO) == 16) ? 16 : SH_DBX_REGISTER_NUMBER (REGNO)) + +/* Since libgcc is compiled with -fpic for this target, we can't use + __sdivsi3_1 as the division strategy for -O0 and -Os. */ +#undef SH_DIV_STRATEGY_DEFAULT +#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL2 +#undef SH_DIV_STR_FOR_SIZE +#define SH_DIV_STR_FOR_SIZE "call2" diff --git a/gcc/config/sh/little.h b/gcc/config/sh/little.h new file mode 100644 index 000000000..f87c7b77d --- /dev/null +++ b/gcc/config/sh/little.h @@ -0,0 +1,21 @@ +/* Definition of little endian SH machine for GNU compiler. + + Copyright (C) 2002, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define TARGET_ENDIAN_DEFAULT MASK_LITTLE_ENDIAN diff --git a/gcc/config/sh/netbsd-elf.h b/gcc/config/sh/netbsd-elf.h new file mode 100644 index 000000000..50bb2f2db --- /dev/null +++ b/gcc/config/sh/netbsd-elf.h @@ -0,0 +1,117 @@ +/* Definitions for SH running NetBSD using ELF + Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc. + Contributed by Wasabi Systems, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Run-time Target Specification. */ +#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN +#define TARGET_VERSION_ENDIAN "le" +#else +#define TARGET_VERSION_ENDIAN "" +#endif + +#if TARGET_CPU_DEFAULT & MASK_SH5 +#if TARGET_CPU_DEFAULT & MASK_SH_E +#define TARGET_VERSION_CPU "sh5" +#else +#define TARGET_VERSION_CPU "sh64" +#endif /* MASK_SH_E */ +#else +#define TARGET_VERSION_CPU "sh" +#endif /* MASK_SH5 */ + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (NetBSD/%s%s ELF)", \ + TARGET_VERSION_CPU, TARGET_VERSION_ENDIAN) + + +/* Extra specs needed for NetBSD SuperH ELF targets. */ + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "netbsd_entry_point", NETBSD_ENTRY_POINT }, + + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + NETBSD_OS_CPP_BUILTINS_ELF(); \ + builtin_define ("__NO_LEADING_UNDERSCORES__"); \ + } \ + while (0) + +/* Provide a LINK_SPEC appropriate for a NetBSD/sh ELF target. + We use the SH_LINK_SPEC from sh/sh.h, and define the appropriate + SUBTARGET_LINK_SPEC that pulls in what we need from a generic + NetBSD ELF LINK_SPEC. */ + +/* LINK_EMUL_PREFIX from sh/elf.h */ + +#undef SUBTARGET_LINK_EMUL_SUFFIX +#define SUBTARGET_LINK_EMUL_SUFFIX "_nbsd" + +#undef SUBTARGET_LINK_SPEC +#define SUBTARGET_LINK_SPEC NETBSD_LINK_SPEC_ELF + +#undef LINK_SPEC +#define LINK_SPEC SH_LINK_SPEC + +#define NETBSD_ENTRY_POINT "__start" + +/* Provide a CPP_SPEC appropriate for NetBSD. */ +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT \ + (TARGET_CPU_DEFAULT | MASK_USERMODE | TARGET_ENDIAN_DEFAULT) + +/* Define because we use the label and we do not need them. */ +#define NO_PROFILE_COUNTERS 1 + +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER(STREAM,LABELNO) \ +do \ + { \ + if (TARGET_SHMEDIA32 || TARGET_SHMEDIA64) \ + { \ + /* FIXME */ \ + sorry ("unimplemented-shmedia profiling"); \ + } \ + else \ + { \ + fprintf((STREAM), "\tmov.l\t%sLP%d,r1\n", \ + LOCAL_LABEL_PREFIX, (LABELNO)); \ + fprintf((STREAM), "\tmova\t%sLP%dr,r0\n", \ + LOCAL_LABEL_PREFIX, (LABELNO)); \ + fprintf((STREAM), "\tjmp\t@r1\n"); \ + fprintf((STREAM), "\tnop\n"); \ + fprintf((STREAM), "\t.align\t2\n"); \ + fprintf((STREAM), "%sLP%d:\t.long\t__mcount\n", \ + LOCAL_LABEL_PREFIX, (LABELNO)); \ + fprintf((STREAM), "%sLP%dr:\n", LOCAL_LABEL_PREFIX, (LABELNO)); \ + } \ + } \ +while (0) + +/* Since libgcc is compiled with -fpic for this target, we can't use + __sdivsi3_1 as the division strategy for -O0 and -Os. */ +#undef SH_DIV_STRATEGY_DEFAULT +#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL2 +#undef SH_DIV_STR_FOR_SIZE +#define SH_DIV_STR_FOR_SIZE "call2" diff --git a/gcc/config/sh/newlib.h b/gcc/config/sh/newlib.h new file mode 100644 index 000000000..13099c1f8 --- /dev/null +++ b/gcc/config/sh/newlib.h @@ -0,0 +1,25 @@ +/* Definitions of target machine for gcc for Super-H using sh-superh-elf. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +/* This header file is used when with_libgloss is enabled during gcc + configuration. */ + +#undef LIB_SPEC +#define LIB_SPEC "-lc -lgloss" diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md new file mode 100644 index 000000000..b6508b70d --- /dev/null +++ b/gcc/config/sh/predicates.md @@ -0,0 +1,833 @@ +;; Predicate definitions for Renesas / SuperH SH. +;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; TODO: Add a comment here. + +(define_predicate "trapping_target_operand" + (match_code "if_then_else") +{ + rtx cond, mem, res, tar, and_expr; + + if (GET_MODE (op) != PDImode) + return 0; + cond = XEXP (op, 0); + mem = XEXP (op, 1); + res = XEXP (op, 2); + if (!MEM_P (mem) + || (GET_CODE (res) != SIGN_EXTEND && GET_CODE (res) != TRUNCATE)) + return 0; + tar = XEXP (res, 0); + if (!rtx_equal_p (XEXP (mem, 0), tar) + || GET_MODE (tar) != Pmode) + return 0; + if (GET_CODE (cond) == CONST) + { + cond = XEXP (cond, 0); + if (!satisfies_constraint_Csy (tar)) + return 0; + if (GET_CODE (tar) == CONST) + tar = XEXP (tar, 0); + } + else if (!arith_reg_operand (tar, VOIDmode) + && ! satisfies_constraint_Csy (tar)) + return 0; + if (GET_CODE (cond) != EQ) + return 0; + and_expr = XEXP (cond, 0); + return (GET_CODE (and_expr) == AND + && rtx_equal_p (XEXP (and_expr, 0), tar) + && CONST_INT_P (XEXP (and_expr, 1)) + && CONST_INT_P (XEXP (cond, 1)) + && INTVAL (XEXP (and_expr, 1)) == 3 + && INTVAL (XEXP (cond, 1)) == 3); +}) + +;; TODO: Add a comment here. + +(define_predicate "and_operand" + (match_code "subreg,reg,const_int") +{ + if (logical_operand (op, mode)) + return 1; + + /* Check mshflo.l / mshflhi.l opportunities. */ + if (TARGET_SHMEDIA + && mode == DImode + && satisfies_constraint_J16 (op)) + return 1; + + return 0; +}) + +;; Like arith_reg_dest, but this predicate is defined with +;; define_special_predicate, not define_predicate. + +(define_special_predicate "any_arith_reg_dest" + (match_code "subreg,reg") +{ + return arith_reg_dest (op, mode); +}) + +;; Like register_operand, but this predicate is defined with +;; define_special_predicate, not define_predicate. + +(define_special_predicate "any_register_operand" + (match_code "subreg,reg") +{ + return register_operand (op, mode); +}) + +;; Returns 1 if OP is a valid source operand for an arithmetic insn. + +(define_predicate "arith_operand" + (match_code "subreg,reg,const_int,truncate") +{ + if (arith_reg_operand (op, mode)) + return 1; + + if (TARGET_SHMEDIA) + { + /* FIXME: We should be checking whether the CONST_INT fits in a + signed 16-bit here, but this causes reload_cse to crash when + attempting to transform a sequence of two 64-bit sets of the + same register from literal constants into a set and an add, + when the difference is too wide for an add. */ + if (CONST_INT_P (op) + || satisfies_constraint_Css (op)) + return 1; + else if (GET_CODE (op) == TRUNCATE + && REG_P (XEXP (op, 0)) + && ! system_reg_operand (XEXP (op, 0), VOIDmode) + && (mode == VOIDmode || mode == GET_MODE (op)) + && (GET_MODE_SIZE (GET_MODE (op)) + < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))) + && (! FP_REGISTER_P (REGNO (XEXP (op, 0))) + || GET_MODE_SIZE (GET_MODE (op)) == 4)) + return register_operand (XEXP (op, 0), VOIDmode); + else + return 0; + } + else if (satisfies_constraint_I08 (op)) + return 1; + + return 0; +}) + +;; Like above, but for DImode destinations: forbid paradoxical DImode +;; subregs, because this would lead to missing sign extensions when +;; truncating from DImode to SImode. + +(define_predicate "arith_reg_dest" + (match_code "subreg,reg") +{ + if (mode == DImode && GET_CODE (op) == SUBREG + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8 + && TARGET_SHMEDIA) + return 0; + return arith_reg_operand (op, mode); +}) + +;; Returns 1 if OP is a normal arithmetic register. + +(define_predicate "arith_reg_operand" + (match_code "subreg,reg,sign_extend") +{ + if (register_operand (op, mode)) + { + int regno; + + if (REG_P (op)) + regno = REGNO (op); + else if (GET_CODE (op) == SUBREG && REG_P (SUBREG_REG (op))) + regno = REGNO (SUBREG_REG (op)); + else + return 1; + + return (regno != T_REG && regno != PR_REG + && ! TARGET_REGISTER_P (regno) + && (regno != FPUL_REG || TARGET_SH4) + && regno != MACH_REG && regno != MACL_REG); + } + /* Allow a no-op sign extension - compare LOAD_EXTEND_OP. + We allow SImode here, as not using an FP register is just a matter of + proper register allocation. */ + if (TARGET_SHMEDIA + && GET_MODE (op) == DImode && GET_CODE (op) == SIGN_EXTEND + && GET_MODE (XEXP (op, 0)) == SImode + && GET_CODE (XEXP (op, 0)) != SUBREG) + return register_operand (XEXP (op, 0), VOIDmode); +#if 0 /* Can't do this because of PROMOTE_MODE for unsigned vars. */ + if (GET_MODE (op) == SImode && GET_CODE (op) == SIGN_EXTEND + && GET_MODE (XEXP (op, 0)) == HImode + && REG_P (XEXP (op, 0)) + && REGNO (XEXP (op, 0)) <= LAST_GENERAL_REG) + return register_operand (XEXP (op, 0), VOIDmode); +#endif + if (GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT + && GET_CODE (op) == SUBREG + && GET_MODE (SUBREG_REG (op)) == DImode + && GET_CODE (SUBREG_REG (op)) == SIGN_EXTEND + && GET_MODE (XEXP (SUBREG_REG (op), 0)) == SImode + && GET_CODE (XEXP (SUBREG_REG (op), 0)) != SUBREG) + return register_operand (XEXP (SUBREG_REG (op), 0), VOIDmode); + return 0; +}) + +;; Returns 1 if OP is a valid source operand for a compare insn. + +(define_predicate "arith_reg_or_0_operand" + (match_code "subreg,reg,const_int,const_vector") +{ + if (arith_reg_operand (op, mode)) + return 1; + + if (satisfies_constraint_Z (op)) + return 1; + + return 0; +}) + +;; TODO: Add a comment here. + +(define_predicate "binary_float_operator" + (and (match_code "plus,minus,mult,div") + (match_test "GET_MODE (op) == mode"))) + +;; TODO: Add a comment here. + +(define_predicate "binary_logical_operator" + (and (match_code "and,ior,xor") + (match_test "GET_MODE (op) == mode"))) + +;; Return 1 of OP is an address suitable for a cache manipulation operation. +;; MODE has the meaning as in address_operand. + +(define_special_predicate "cache_address_operand" + (match_code "plus,reg") +{ + if (GET_CODE (op) == PLUS) + { + if (!REG_P (XEXP (op, 0))) + return 0; + if (!CONST_INT_P (XEXP (op, 1)) + || (INTVAL (XEXP (op, 1)) & 31)) + return 0; + } + else if (!REG_P (op)) + return 0; + return address_operand (op, mode); +}) + +;; Return 1 if OP is a valid source operand for shmedia cmpgt / cmpgtu. + +(define_predicate "cmp_operand" + (match_code "subreg,reg,const_int") +{ + if (satisfies_constraint_N (op)) + return 1; + if (TARGET_SHMEDIA + && mode != DImode && GET_CODE (op) == SUBREG + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4) + return 0; + return arith_reg_operand (op, mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "cmpsi_operand" + (match_code "subreg,reg,const_int") +{ + if (REG_P (op) && REGNO (op) == T_REG + && GET_MODE (op) == SImode + && TARGET_SH1) + return 1; + return arith_operand (op, mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "commutative_float_operator" + (and (match_code "plus,mult") + (match_test "GET_MODE (op) == mode"))) + +;; TODO: Add a comment here. + +(define_predicate "equality_comparison_operator" + (match_code "eq,ne")) + +;; TODO: Add a comment here. + +(define_predicate "extend_reg_operand" + (match_code "subreg,reg,truncate") +{ + return (GET_CODE (op) == TRUNCATE + ? arith_operand + : arith_reg_operand) (op, mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "extend_reg_or_0_operand" + (match_code "subreg,reg,truncate,const_int") +{ + return (GET_CODE (op) == TRUNCATE + ? arith_operand + : arith_reg_or_0_operand) (op, mode); +}) + +;; Like arith_reg_operand, but this predicate does not accept SIGN_EXTEND. + +(define_predicate "ext_dest_operand" + (match_code "subreg,reg") +{ + return arith_reg_operand (op, mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "fp_arith_reg_dest" + (match_code "subreg,reg") +{ + if (mode == DImode && GET_CODE (op) == SUBREG + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8) + return 0; + return fp_arith_reg_operand (op, mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "fp_arith_reg_operand" + (match_code "subreg,reg") +{ + if (register_operand (op, mode)) + { + int regno; + + if (REG_P (op)) + regno = REGNO (op); + else if (GET_CODE (op) == SUBREG && REG_P (SUBREG_REG (op))) + regno = REGNO (SUBREG_REG (op)); + else + return 1; + + return (regno >= FIRST_PSEUDO_REGISTER + || FP_REGISTER_P (regno)); + } + return 0; +}) + +;; TODO: Add a comment here. + +(define_predicate "fpscr_operand" + (match_code "reg") +{ + return (REG_P (op) + && (REGNO (op) == FPSCR_REG + || (REGNO (op) >= FIRST_PSEUDO_REGISTER + && !(reload_in_progress || reload_completed))) + && GET_MODE (op) == PSImode); +}) + +;; TODO: Add a comment here. + +(define_predicate "fpul_operand" + (match_code "reg") +{ + if (TARGET_SHMEDIA) + return fp_arith_reg_operand (op, mode); + + return (REG_P (op) + && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER) + && GET_MODE (op) == mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "general_extend_operand" + (match_code "subreg,reg,mem,truncate") +{ + return (GET_CODE (op) == TRUNCATE + ? arith_operand + : nonimmediate_operand) (op, mode); +}) + +;; Returns 1 if OP can be source of a simple move operation. Same as +;; general_operand, but a LABEL_REF is valid, PRE_DEC is invalid as +;; are subregs of system registers. + +(define_predicate "general_movsrc_operand" + (match_code "subreg,reg,const_int,const_double,mem,symbol_ref,label_ref,const,const_vector") +{ + if (MEM_P (op)) + { + rtx inside = XEXP (op, 0); + if (GET_CODE (inside) == CONST) + inside = XEXP (inside, 0); + + if (GET_CODE (inside) == LABEL_REF) + return 1; + + if (GET_CODE (inside) == PLUS + && GET_CODE (XEXP (inside, 0)) == LABEL_REF + && CONST_INT_P (XEXP (inside, 1))) + return 1; + + /* Only post inc allowed. */ + if (GET_CODE (inside) == PRE_DEC) + return 0; + } + + if (TARGET_SHMEDIA + && (GET_CODE (op) == PARALLEL || GET_CODE (op) == CONST_VECTOR) + && sh_rep_vec (op, mode)) + return 1; + if (TARGET_SHMEDIA && 1 + && GET_CODE (op) == SUBREG && GET_MODE (op) == mode + && SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op)) + /* FIXME */ abort (); /* return 1; */ + return general_operand (op, mode); +}) + +;; Returns 1 if OP can be a destination of a move. Same as +;; general_operand, but no preinc allowed. + +(define_predicate "general_movdst_operand" + (match_code "subreg,reg,mem") +{ + /* Only pre dec allowed. */ + if (MEM_P (op) && GET_CODE (XEXP (op, 0)) == POST_INC) + return 0; + if (mode == DImode && TARGET_SHMEDIA && GET_CODE (op) == SUBREG + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8 + && ! (high_life_started || reload_completed)) + return 0; + + return general_operand (op, mode); +}) + + +;; Returns 1 if OP is a POST_INC on stack pointer register. + +(define_predicate "sh_no_delay_pop_operand" + (match_code "mem") +{ + rtx inside; + inside = XEXP (op, 0); + + if (GET_CODE (op) == MEM && GET_MODE (op) == SImode + && GET_CODE (inside) == POST_INC + && GET_CODE (XEXP (inside, 0)) == REG + && REGNO (XEXP (inside, 0)) == SP_REG) + return 1; + + return 0; +}) + + +;; Returns 1 if OP is a MEM that can be source of a simple move operation. + +(define_predicate "unaligned_load_operand" + (match_code "mem") +{ + rtx inside; + + if (!MEM_P (op) || GET_MODE (op) != mode) + return 0; + + inside = XEXP (op, 0); + + if (GET_CODE (inside) == POST_INC) + inside = XEXP (inside, 0); + + if (REG_P (inside)) + return 1; + + return 0; +}) + +;; TODO: Add a comment here. + +(define_predicate "greater_comparison_operator" + (match_code "gt,ge,gtu,geu")) + +;; TODO: Add a comment here. + +(define_predicate "inqhi_operand" + (match_code "truncate") +{ + if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op)) + return 0; + op = XEXP (op, 0); + /* Can't use true_regnum here because copy_cost wants to know about + SECONDARY_INPUT_RELOAD_CLASS. */ + return REG_P (op) && FP_REGISTER_P (REGNO (op)); +}) + +;; TODO: Add a comment here. + +(define_special_predicate "int_gpr_dest" + (match_code "subreg,reg") +{ + enum machine_mode op_mode = GET_MODE (op); + + if (GET_MODE_CLASS (op_mode) != MODE_INT + || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD) + return 0; + if (! reload_completed) + return 0; + return true_regnum (op) <= LAST_GENERAL_REG; +}) + +;; TODO: Add a comment here. + +(define_predicate "less_comparison_operator" + (match_code "lt,le,ltu,leu")) + +;; Returns 1 if OP is a valid source operand for a logical operation. + +(define_predicate "logical_operand" + (match_code "subreg,reg,const_int") +{ + if (TARGET_SHMEDIA + && mode != DImode && GET_CODE (op) == SUBREG + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4) + return 0; + + if (arith_reg_operand (op, mode)) + return 1; + + if (TARGET_SHMEDIA) + { + if (satisfies_constraint_I10 (op)) + return 1; + else + return 0; + } + else if (satisfies_constraint_K08 (op)) + return 1; + + return 0; +}) + +;; TODO: Add a comment here. + +(define_predicate "logical_operator" + (match_code "and,ior,xor")) + +;; Like arith_reg_operand, but for register source operands of narrow +;; logical SHMEDIA operations: forbid subregs of DImode / TImode regs. + +(define_predicate "logical_reg_operand" + (match_code "subreg,reg") +{ + if (TARGET_SHMEDIA + && GET_CODE (op) == SUBREG + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4 + && mode != DImode) + return 0; + return arith_reg_operand (op, mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "mextr_bit_offset" + (match_code "const_int") +{ + HOST_WIDE_INT i; + + if (!CONST_INT_P (op)) + return 0; + i = INTVAL (op); + return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0; +}) + +;; TODO: Add a comment here. + +(define_predicate "minuend_operand" + (match_code "subreg,reg,truncate,const_int") +{ + return op == constm1_rtx || extend_reg_or_0_operand (op, mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "noncommutative_float_operator" + (and (match_code "minus,div") + (match_test "GET_MODE (op) == mode"))) + +;; UNORDERED is only supported on SHMEDIA. + +(define_predicate "sh_float_comparison_operator" + (ior (match_operand 0 "ordered_comparison_operator") + (and (match_test "TARGET_SHMEDIA") + (match_code "unordered")))) + +(define_predicate "shmedia_cbranch_comparison_operator" + (ior (match_operand 0 "equality_comparison_operator") + (match_operand 0 "greater_comparison_operator"))) + +;; TODO: Add a comment here. + +(define_predicate "sh_const_vec" + (match_code "const_vector") +{ + int i; + + if (GET_CODE (op) != CONST_VECTOR + || (GET_MODE (op) != mode && mode != VOIDmode)) + return 0; + i = XVECLEN (op, 0) - 1; + for (; i >= 0; i--) + if (!CONST_INT_P (XVECEXP (op, 0, i))) + return 0; + return 1; +}) + +;; Determine if OP is a constant vector matching MODE with only one +;; element that is not a sign extension. Two byte-sized elements +;; count as one. + +(define_predicate "sh_1el_vec" + (match_code "const_vector") +{ + int unit_size; + int i, last, least, sign_ix; + rtx sign; + + if (GET_CODE (op) != CONST_VECTOR + || (GET_MODE (op) != mode && mode != VOIDmode)) + return 0; + /* Determine numbers of last and of least significant elements. */ + last = XVECLEN (op, 0) - 1; + least = TARGET_LITTLE_ENDIAN ? 0 : last; + if (!CONST_INT_P (XVECEXP (op, 0, least))) + return 0; + sign_ix = least; + if (GET_MODE_UNIT_SIZE (mode) == 1) + sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1; + if (!CONST_INT_P (XVECEXP (op, 0, sign_ix))) + return 0; + unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op)); + sign = (INTVAL (XVECEXP (op, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1) + ? constm1_rtx : const0_rtx); + i = XVECLEN (op, 0) - 1; + do + if (i != least && i != sign_ix && XVECEXP (op, 0, i) != sign) + return 0; + while (--i); + return 1; +}) + +;; Like register_operand, but take into account that SHMEDIA can use +;; the constant zero like a general register. + +(define_predicate "sh_register_operand" + (match_code "reg,subreg,const_int,const_double") +{ + if (op == CONST0_RTX (mode) && TARGET_SHMEDIA) + return 1; + return register_operand (op, mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "sh_rep_vec" + (match_code "const_vector,parallel") +{ + int i; + rtx x, y; + + if ((GET_CODE (op) != CONST_VECTOR && GET_CODE (op) != PARALLEL) + || (GET_MODE (op) != mode && mode != VOIDmode)) + return 0; + i = XVECLEN (op, 0) - 2; + x = XVECEXP (op, 0, i + 1); + if (GET_MODE_UNIT_SIZE (mode) == 1) + { + y = XVECEXP (op, 0, i); + for (i -= 2; i >= 0; i -= 2) + if (! rtx_equal_p (XVECEXP (op, 0, i + 1), x) + || ! rtx_equal_p (XVECEXP (op, 0, i), y)) + return 0; + } + else + for (; i >= 0; i--) + if (XVECEXP (op, 0, i) != x) + return 0; + return 1; +}) + +;; TODO: Add a comment here. + +(define_predicate "shift_count_operand" + (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,zero_extend,sign_extend") +{ + return (CONSTANT_P (op) + ? (CONST_INT_P (op) + ? (unsigned) INTVAL (op) < GET_MODE_BITSIZE (mode) + : nonmemory_operand (op, mode)) + : shift_count_reg_operand (op, mode)); +}) + +;; TODO: Add a comment here. + +(define_predicate "shift_count_reg_operand" + (match_code "subreg,reg,zero_extend,sign_extend") +{ + if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND + || (GET_CODE (op) == SUBREG && SUBREG_BYTE (op) == 0)) + && (mode == VOIDmode || mode == GET_MODE (op)) + && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6 + && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT) + { + mode = VOIDmode; + do + op = XEXP (op, 0); + while ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND + || GET_CODE (op) == TRUNCATE) + && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6 + && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT); + + } + return arith_reg_operand (op, mode); +}) + +;; TODO: Add a comment here. + +(define_predicate "shift_operator" + (match_code "ashift,ashiftrt,lshiftrt")) + +;; TODO: Add a comment here. + +(define_predicate "symbol_ref_operand" + (match_code "symbol_ref")) + +;; Same as target_reg_operand, except that label_refs and symbol_refs +;; are accepted before reload. + +(define_special_predicate "target_operand" + (match_code "subreg,reg,label_ref,symbol_ref,const,unspec") +{ + if (mode != VOIDmode && mode != Pmode) + return 0; + + if ((GET_MODE (op) == Pmode || GET_MODE (op) == VOIDmode) + && satisfies_constraint_Csy (op)) + return ! reload_completed; + + return target_reg_operand (op, mode); +}) + +;; Accept pseudos and branch target registers. + +(define_special_predicate "target_reg_operand" + (match_code "subreg,reg") +{ + if (mode == VOIDmode + ? GET_MODE (op) != Pmode && GET_MODE (op) != PDImode + : mode != GET_MODE (op)) + return 0; + + if (GET_CODE (op) == SUBREG) + op = XEXP (op, 0); + + if (!REG_P (op)) + return 0; + + /* We must protect ourselves from matching pseudos that are virtual + register, because they will eventually be replaced with hardware + registers that aren't branch-target registers. */ + if (REGNO (op) > LAST_VIRTUAL_REGISTER + || TARGET_REGISTER_P (REGNO (op))) + return 1; + + return 0; +}) + +;; TODO: Add a comment here. + +(define_special_predicate "trunc_hi_operand" + (match_code "subreg,reg,truncate") +{ + enum machine_mode op_mode = GET_MODE (op); + + if (op_mode != SImode && op_mode != DImode + && op_mode != V4HImode && op_mode != V2SImode) + return 0; + return extend_reg_operand (op, mode); +}) + +;; Return 1 of OP is an address suitable for an unaligned access instruction. + +(define_special_predicate "ua_address_operand" + (match_code "subreg,reg,plus") +{ + if (GET_CODE (op) == PLUS + && (! satisfies_constraint_I06 (XEXP (op, 1)))) + return 0; + return address_operand (op, QImode); +}) + +;; TODO: Add a comment here. + +(define_predicate "ua_offset" + (match_code "const_int") +{ + return satisfies_constraint_I06 (op); +}) + +;; TODO: Add a comment here. + +(define_predicate "unary_float_operator" + (and (match_code "abs,neg,sqrt") + (match_test "GET_MODE (op) == mode"))) + +;; Return 1 if OP is a valid source operand for xor. + +(define_predicate "xor_operand" + (match_code "subreg,reg,const_int") +{ + if (CONST_INT_P (op)) + return (TARGET_SHMEDIA + ? (satisfies_constraint_I06 (op) + || (!can_create_pseudo_p () && INTVAL (op) == 0xff)) + : satisfies_constraint_K08 (op)); + if (TARGET_SHMEDIA + && mode != DImode && GET_CODE (op) == SUBREG + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4) + return 0; + return arith_reg_operand (op, mode); +}) + +(define_predicate "bitwise_memory_operand" + (match_code "mem") +{ + if (MEM_P (op)) + { + if (REG_P (XEXP (op, 0))) + return 1; + + if (GET_CODE (XEXP (op, 0)) == PLUS + && REG_P (XEXP (XEXP (op, 0), 0)) + && satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1))) + return 1; + } + return 0; +}) diff --git a/gcc/config/sh/rtems.h b/gcc/config/sh/rtems.h new file mode 100644 index 000000000..61fab07e0 --- /dev/null +++ b/gcc/config/sh/rtems.h @@ -0,0 +1,26 @@ +/* Definitions for rtems targeting a SH using COFF. + Copyright (C) 1997, 1998, 2000, 2002, 2007 Free Software Foundation, Inc. + Contributed by Joel Sherrill (joel@OARcorp.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Specify predefined symbols in preprocessor. */ + +#define TARGET_OS_CPP_BUILTINS() do { \ + builtin_define( "__rtems__" ); \ + builtin_assert( "system=rtems" ); \ +} while (0) diff --git a/gcc/config/sh/rtemself.h b/gcc/config/sh/rtemself.h new file mode 100644 index 000000000..aba98f686 --- /dev/null +++ b/gcc/config/sh/rtemself.h @@ -0,0 +1,26 @@ +/* Definitions for rtems targeting a SH using elf. + Copyright (C) 1997, 1998, 2000, 2002, 2007 Free Software Foundation, Inc. + Contributed by Joel Sherrill (joel@OARcorp.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Specify predefined symbols in preprocessor. */ + +#define TARGET_OS_CPP_BUILTINS() do { \ + builtin_define( "__rtems__" ); \ + builtin_assert( "system=rtems" ); \ +} while (0) diff --git a/gcc/config/sh/sh-c.c b/gcc/config/sh/sh-c.c new file mode 100644 index 000000000..2fdff542b --- /dev/null +++ b/gcc/config/sh/sh-c.c @@ -0,0 +1,68 @@ +/* Pragma handling for GCC for Renesas / SuperH SH. + Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, + 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. + Contributed by Joern Rennecke . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "tm_p.h" + +/* Handle machine specific pragmas to be semi-compatible with Renesas + compiler. */ + +/* Add ATTR to the attributes of the current function. If there is no + such function, save it to be added to the attributes of the next + function. */ +static void +sh_add_function_attribute (const char *attr) +{ + tree id = get_identifier (attr); + + if (current_function_decl) + decl_attributes (¤t_function_decl, + tree_cons (id, NULL_TREE, NULL_TREE), 0); + else + { + *sh_deferred_function_attributes_tail + = tree_cons (id, NULL_TREE, *sh_deferred_function_attributes_tail); + sh_deferred_function_attributes_tail + = &TREE_CHAIN (*sh_deferred_function_attributes_tail); + } +} + +void +sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + sh_add_function_attribute ("interrupt_handler"); +} + +void +sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + sh_add_function_attribute ("trapa_handler"); +} + +void +sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + sh_add_function_attribute ("nosave_low_regs"); +} diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def new file mode 100644 index 000000000..98e974a1a --- /dev/null +++ b/gcc/config/sh/sh-modes.def @@ -0,0 +1,34 @@ +/* SH extra machine modes. + Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* The SH uses a partial integer mode to represent the FPSCR register. */ +PARTIAL_INT_MODE (SI); +/* PDI mode is used to represent a function address in a target register. */ +PARTIAL_INT_MODE (DI); + +/* Vector modes. */ +VECTOR_MODE (INT, QI, 2); /* V2QI */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ +VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ +VECTOR_MODE (INT, DI, 4); /* V4DI */ +VECTOR_MODE (INT, DI, 8); /* V8DI */ +VECTOR_MODE (FLOAT, SF, 16); /* V16SF */ diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h new file mode 100644 index 000000000..d29147c98 --- /dev/null +++ b/gcc/config/sh/sh-protos.h @@ -0,0 +1,186 @@ +/* Definitions of target machine for GNU compiler for Renesas / SuperH SH. + Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2003, + 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Steve Chamberlain (sac@cygnus.com). + Improved by Jim Wilson (wilson@cygnus.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_SH_PROTOS_H +#define GCC_SH_PROTOS_H + +enum sh_function_kind { + /* A function with normal C ABI */ + FUNCTION_ORDINARY, + /* A special function that guarantees that some otherwise call-clobbered + registers are not clobbered. These can't go through the SH5 resolver, + because it only saves argument passing registers. */ + SFUNC_GOT, + /* A special function that should be linked statically. These are typically + smaller or not much larger than a PLT entry. + Some also have a non-standard ABI which precludes dynamic linking. */ + SFUNC_STATIC +}; + +#ifdef RTX_CODE +extern rtx sh_fsca_sf2int (void); +extern rtx sh_fsca_df2int (void); +extern rtx sh_fsca_int2sf (void); + +/* Declare functions defined in sh.c and used in templates. */ + +extern const char *output_branch (int, rtx, rtx *); +extern const char *output_ieee_ccmpeq (rtx, rtx *); +extern const char *output_branchy_insn (enum rtx_code, const char *, rtx, rtx *); +extern const char *output_movedouble (rtx, rtx[], enum machine_mode); +extern const char *output_movepcrel (rtx, rtx[], enum machine_mode); +extern const char *output_far_jump (rtx, rtx); + +extern struct rtx_def *sfunc_uses_reg (rtx); +extern int barrier_align (rtx); +extern int sh_loop_align (rtx); +extern int fp_zero_operand (rtx); +extern int fp_one_operand (rtx); +extern int fp_int_operand (rtx); +extern rtx get_fpscr_rtx (void); +extern bool sh_legitimate_index_p (enum machine_mode, rtx); +extern bool sh_legitimize_reload_address (rtx *, enum machine_mode, int, int); +extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx); +extern int nonpic_symbol_mentioned_p (rtx); +extern void emit_sf_insn (rtx); +extern void emit_df_insn (rtx); +extern void output_pic_addr_const (FILE *, rtx); +extern int expand_block_move (rtx *); +extern int prepare_move_operands (rtx[], enum machine_mode mode); +extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode, + enum rtx_code comparison); +extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int); +extern bool expand_cbranchdi4 (rtx *operands, enum rtx_code comparison); +extern void sh_emit_scc_to_t (enum rtx_code, rtx, rtx); +extern rtx sh_emit_cheap_store_flag (enum machine_mode, enum rtx_code, rtx, rtx); +extern void sh_emit_compare_and_branch (rtx *, enum machine_mode); +extern void sh_emit_compare_and_set (rtx *, enum machine_mode); +extern int shift_insns_rtx (rtx); +extern void gen_ashift (int, int, rtx); +extern void gen_ashift_hi (int, int, rtx); +extern void gen_shifty_op (int, rtx *); +extern void gen_shifty_hi_op (int, rtx *); +extern int expand_ashiftrt (rtx *); +extern int sh_dynamicalize_shift_p (rtx); +extern int shl_and_kind (rtx, rtx, int *); +extern int shl_and_length (rtx); +extern int shl_and_scr_length (rtx); +extern int gen_shl_and (rtx, rtx, rtx, rtx); +extern int shl_sext_kind (rtx, rtx, int *); +extern int shl_sext_length (rtx); +extern int gen_shl_sext (rtx, rtx, rtx, rtx); +extern rtx gen_datalabel_ref (rtx); +extern int regs_used (rtx, int); +extern void fixup_addr_diff_vecs (rtx); +extern int get_dest_uid (rtx, int); +extern void final_prescan_insn (rtx, rtx *, int); +extern int symbol_ref_operand (rtx, enum machine_mode); +extern enum tls_model tls_symbolic_operand (rtx, enum machine_mode); +extern int system_reg_operand (rtx, enum machine_mode); +extern int general_movsrc_operand (rtx, enum machine_mode); +extern int general_movdst_operand (rtx, enum machine_mode); +extern int arith_reg_operand (rtx, enum machine_mode); +extern int fp_arith_reg_operand (rtx, enum machine_mode); +extern int arith_operand (rtx, enum machine_mode); +extern int arith_reg_or_0_operand (rtx, enum machine_mode); +extern int logical_operand (rtx, enum machine_mode); +extern int tertiary_reload_operand (rtx, enum machine_mode); +extern int fpscr_operand (rtx, enum machine_mode); +extern int fpul_operand (rtx, enum machine_mode); +extern int commutative_float_operator (rtx, enum machine_mode); +extern int noncommutative_float_operator (rtx, enum machine_mode); +extern int reg_unused_after (rtx, rtx); +extern void expand_sf_unop (rtx (*)(rtx, rtx, rtx), rtx *); +extern void expand_sf_binop (rtx (*)(rtx, rtx, rtx, rtx), rtx *); +extern void expand_df_unop (rtx (*)(rtx, rtx, rtx), rtx *); +extern void expand_df_binop (rtx (*)(rtx, rtx, rtx, rtx), rtx *); +extern void expand_fp_branch (rtx (*)(void), rtx (*)(void)); +extern int sh_insn_length_adjustment (rtx); +extern int sh_can_redirect_branch (rtx, rtx); +extern void sh_expand_unop_v2sf (enum rtx_code, rtx, rtx); +extern void sh_expand_binop_v2sf (enum rtx_code, rtx, rtx, rtx); +extern int sh_expand_t_scc (rtx *); +extern rtx sh_gen_truncate (enum machine_mode, rtx, int); +extern bool sh_vector_mode_supported_p (enum machine_mode); +#endif /* RTX_CODE */ + +extern const char *output_jump_label_table (void); +extern int sh_handle_pragma (int (*)(void), void (*)(int), const char *); +extern struct rtx_def *get_fpscr_rtx (void); +extern int sh_media_register_for_return (void); +extern void sh_expand_prologue (void); +extern void sh_expand_epilogue (bool); +extern int sh_need_epilogue (void); +extern void sh_set_return_address (rtx, rtx); +extern int initial_elimination_offset (int, int); +extern int fldi_ok (void); +extern int sh_hard_regno_rename_ok (unsigned int, unsigned int); +extern int sh_cfun_interrupt_handler_p (void); +extern int sh_cfun_resbank_handler_p (void); +extern int sh_attr_renesas_p (const_tree); +extern int sh_cfun_attr_renesas_p (void); +extern bool sh_cannot_change_mode_class + (enum machine_mode, enum machine_mode, enum reg_class); +extern bool sh_small_register_classes_for_mode_p (enum machine_mode); +extern void sh_mark_label (rtx, int); +extern int check_use_sfunc_addr (rtx, rtx); + +#ifdef HARD_CONST +extern void fpscr_set_from_mem (int, HARD_REG_SET); +#endif + +extern void sh_pr_interrupt (struct cpp_reader *); +extern void sh_pr_trapa (struct cpp_reader *); +extern void sh_pr_nosave_low_regs (struct cpp_reader *); +extern rtx function_symbol (rtx, const char *, enum sh_function_kind); +extern rtx sh_get_pr_initial_val (void); + +extern int sh_pass_in_reg_p (CUMULATIVE_ARGS *, enum machine_mode, tree); +extern void sh_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, signed int, enum machine_mode); +extern rtx sh_dwarf_register_span (rtx); + +extern rtx replace_n_hard_rtx (rtx, rtx *, int , int); +extern int shmedia_cleanup_truncate (rtx *, void *); + +extern int sh_contains_memref_p (rtx); +extern int sh_loads_bankedreg_p (rtx); +extern rtx shmedia_prepare_call_address (rtx fnaddr, int is_sibcall); +extern int sh2a_get_function_vector_number (rtx); +extern int sh2a_is_function_vector_call (rtx); +extern void sh_fix_range (const char *); +extern bool sh_hard_regno_mode_ok (unsigned int, enum machine_mode); +#endif /* ! GCC_SH_PROTOS_H */ + +#ifdef SYMBIAN +extern const char * sh_symbian_strip_name_encoding (const char *); +extern bool sh_symbian_is_dllexported_name (const char *); +#ifdef TREE_CODE +extern bool sh_symbian_is_dllexported (tree); +extern int sh_symbian_import_export_class (tree, int); +extern tree sh_symbian_handle_dll_attribute (tree *, tree, tree, int, bool *); +#ifdef RTX_CODE +extern void sh_symbian_encode_section_info (tree, rtx, int); +#endif +#endif +#endif /* SYMBIAN */ + diff --git a/gcc/config/sh/sh-symbian.h b/gcc/config/sh/sh-symbian.h new file mode 100644 index 000000000..2e37d2bbc --- /dev/null +++ b/gcc/config/sh/sh-symbian.h @@ -0,0 +1,42 @@ +/* header file for GCC for a Symbian OS targeted SH backend. + Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. + Contributed by RedHat. + Most of this code is stolen from i386/winnt.c. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* A unique character to encode declspec encoded objects. */ +#define SH_SYMBIAN_FLAG_CHAR "$" + +/* Unique strings to prefix exported and imported objects. */ +#define DLL_IMPORT_PREFIX SH_SYMBIAN_FLAG_CHAR "i." +#define DLL_EXPORT_PREFIX SH_SYMBIAN_FLAG_CHAR "e." + +/* Select the level of debugging information to display. + 0 for no debugging. + 1 for informative messages about decisions to add attributes + 2 for verbose information about what is being done. */ +#define SYMBIAN_DEBUG 0 +/* #define SYMBIAN_DEBUG 1 */ +/* #define SYMBIAN_DEBUG 2 */ + +/* Functions exported from symbian-base.c. */ +extern tree sh_symbian_associated_type (tree); + +/* Functions exported from symbian-[c|c++].c. */ +extern bool sh_symbian_is_dllimported (tree); + diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c new file mode 100644 index 000000000..11e537b1f --- /dev/null +++ b/gcc/config/sh/sh.c @@ -0,0 +1,12610 @@ +/* Output routines for GCC for Renesas / SuperH SH. + Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, + 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Steve Chamberlain (sac@cygnus.com). + Improved by Jim Wilson (wilson@cygnus.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "insn-config.h" +#include "rtl.h" +#include "tree.h" +#include "flags.h" +#include "expr.h" +#include "optabs.h" +#include "reload.h" +#include "function.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "output.h" +#include "insn-attr.h" +#include "diagnostic-core.h" +#include "recog.h" +#include "integrate.h" +#include "dwarf2.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" +#include "langhooks.h" +#include "basic-block.h" +#include "df.h" +#include "cfglayout.h" +#include "intl.h" +#include "sched-int.h" +#include "params.h" +#include "ggc.h" +#include "gimple.h" +#include "cfgloop.h" +#include "alloc-pool.h" +#include "tm-constrs.h" + + +int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch; + +#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0) +#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1) + +/* These are some macros to abstract register modes. */ +#define CONST_OK_FOR_ADD(size) \ + (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size)) +#define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi)) +#define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3)) +#define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3)) + +/* Used to simplify the logic below. Find the attributes wherever + they may be. */ +#define SH_ATTRIBUTES(decl) \ + (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \ + : DECL_ATTRIBUTES (decl) \ + ? (DECL_ATTRIBUTES (decl)) \ + : TYPE_ATTRIBUTES (TREE_TYPE (decl)) + +/* Set to 1 by expand_prologue() when the function is an interrupt handler. */ +int current_function_interrupt; + +tree sh_deferred_function_attributes; +tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes; + +/* Global variables for machine-dependent things. */ + +/* Which cpu are we scheduling for. */ +enum processor_type sh_cpu; + +/* Definitions used in ready queue reordering for first scheduling pass. */ + +/* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */ +static short *regmode_weight[2]; + +/* Total SFmode and SImode weights of scheduled insns. */ +static int curr_regmode_pressure[2]; + +/* Number of r0 life regions. */ +static int r0_life_regions; + +/* If true, skip cycles for Q -> R movement. */ +static int skip_cycles = 0; + +/* Cached value of can_issue_more. This is cached in sh_variable_issue hook + and returned from sh_reorder2. */ +static short cached_can_issue_more; + +/* Unique number for UNSPEC_BBR pattern. */ +static unsigned int unspec_bbr_uid = 1; + +/* Provides the class number of the smallest class containing + reg number. */ + +enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] = +{ + R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + FP0_REGS,FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, + TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, + DF_REGS, DF_REGS, DF_REGS, DF_REGS, + DF_REGS, DF_REGS, DF_REGS, DF_REGS, + NO_REGS, GENERAL_REGS, PR_REGS, T_REGS, + MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS, + GENERAL_REGS, GENERAL_REGS, +}; + +char sh_register_names[FIRST_PSEUDO_REGISTER] \ + [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER; + +char sh_additional_register_names[ADDREGNAMES_SIZE] \ + [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1] + = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER; + +int assembler_dialect; + +static bool shmedia_space_reserved_for_target_registers; + +static bool sh_handle_option (size_t, const char *, int); +static void split_branches (rtx); +static int branch_dest (rtx); +static void force_into (rtx, rtx); +static void print_slot (rtx); +static rtx add_constant (rtx, enum machine_mode, rtx); +static void dump_table (rtx, rtx); +static int hi_const (rtx); +static int broken_move (rtx); +static int mova_p (rtx); +static rtx find_barrier (int, rtx, rtx); +static int noncall_uses_reg (rtx, rtx, rtx *); +static rtx gen_block_redirect (rtx, int, int); +static void sh_reorg (void); +static void sh_option_override (void); +static void sh_option_init_struct (struct gcc_options *); +static void sh_option_default_params (void); +static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool); +static rtx frame_insn (rtx); +static rtx push (int); +static void pop (int); +static void push_regs (HARD_REG_SET *, int); +static int calc_live_regs (HARD_REG_SET *); +static HOST_WIDE_INT rounded_frame_size (int); +static bool sh_frame_pointer_required (void); +static rtx mark_constant_pool_use (rtx); +static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *); +static tree sh_handle_resbank_handler_attribute (tree *, tree, + tree, int, bool *); +static tree sh2a_handle_function_vector_handler_attribute (tree *, tree, + tree, int, bool *); +static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *); +static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *); +static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *); +static void sh_print_operand (FILE *, rtx, int); +static void sh_print_operand_address (FILE *, rtx); +static bool sh_print_operand_punct_valid_p (unsigned char code); +static bool sh_asm_output_addr_const_extra (FILE *file, rtx x); +static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT); +static void sh_insert_attributes (tree, tree *); +static const char *sh_check_pch_target_flags (int); +static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t); +static int sh_adjust_cost (rtx, rtx, rtx, int); +static int sh_issue_rate (void); +static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p); +static short find_set_regmode_weight (rtx, enum machine_mode); +static short find_insn_regmode_weight (rtx, enum machine_mode); +static void find_regmode_weight (basic_block, enum machine_mode); +static int find_r0_life_regions (basic_block); +static void sh_md_init_global (FILE *, int, int); +static void sh_md_finish_global (FILE *, int); +static int rank_for_reorder (const void *, const void *); +static void swap_reorder (rtx *, int); +static void ready_reorder (rtx *, int); +static short high_pressure (enum machine_mode); +static int sh_reorder (FILE *, int, rtx *, int *, int); +static int sh_reorder2 (FILE *, int, rtx *, int *, int); +static void sh_md_init (FILE *, int, int); +static int sh_variable_issue (FILE *, int, rtx, int); + +static bool sh_function_ok_for_sibcall (tree, tree); + +static bool sh_cannot_modify_jumps_p (void); +static reg_class_t sh_target_reg_class (void); +static bool sh_optimize_target_register_callee_saved (bool); +static bool sh_ms_bitfield_layout_p (const_tree); + +static void sh_init_builtins (void); +static tree sh_builtin_decl (unsigned, bool); +static void sh_media_init_builtins (void); +static tree sh_media_builtin_decl (unsigned, bool); +static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int); +static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); +static void sh_file_start (void); +static int flow_dependent_p (rtx, rtx); +static void flow_dependent_p_1 (rtx, const_rtx, void *); +static int shiftcosts (rtx); +static int andcosts (rtx); +static int addsubcosts (rtx); +static int multcosts (rtx); +static bool unspec_caller_rtx_p (rtx); +static bool sh_cannot_copy_insn_p (rtx); +static bool sh_rtx_costs (rtx, int, int, int *, bool); +static int sh_address_cost (rtx, bool); +static int sh_pr_n_sets (void); +static rtx sh_allocate_initial_value (rtx); +static reg_class_t sh_preferred_reload_class (rtx, reg_class_t); +static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t, + enum machine_mode, + struct secondary_reload_info *); +static bool sh_legitimate_address_p (enum machine_mode, rtx, bool); +static rtx sh_legitimize_address (rtx, rtx, enum machine_mode); +static rtx sh_delegitimize_address (rtx); +static int shmedia_target_regs_stack_space (HARD_REG_SET *); +static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *); +static int shmedia_target_regs_stack_adjust (HARD_REG_SET *); +static int scavenge_reg (HARD_REG_SET *s); +struct save_schedule_s; +static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *, + struct save_schedule_s *, int); + +static rtx sh_struct_value_rtx (tree, int); +static rtx sh_function_value (const_tree, const_tree, bool); +static bool sh_function_value_regno_p (const unsigned int); +static rtx sh_libcall_value (enum machine_mode, const_rtx); +static bool sh_return_in_memory (const_tree, const_tree); +static rtx sh_builtin_saveregs (void); +static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int); +static bool sh_strict_argument_naming (CUMULATIVE_ARGS *); +static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *); +static tree sh_build_builtin_va_list (void); +static void sh_va_start (tree, rtx); +static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); +static bool sh_promote_prototypes (const_tree); +static enum machine_mode sh_promote_function_mode (const_tree type, + enum machine_mode, + int *punsignedp, + const_tree funtype, + int for_return); +static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, + tree, bool); +static void sh_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static rtx sh_function_arg (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static bool sh_scalar_mode_supported_p (enum machine_mode); +static int sh_dwarf_calling_convention (const_tree); +static void sh_encode_section_info (tree, rtx, int); +static int sh2a_function_vector_p (tree); +static void sh_trampoline_init (rtx, tree, rtx); +static rtx sh_trampoline_adjust_address (rtx); +static void sh_conditional_register_usage (void); + +static const struct attribute_spec sh_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute }, + { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute }, + { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute }, + { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute }, + { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute }, + { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute }, + { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute }, + { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute }, +#ifdef SYMBIAN + /* Symbian support adds three new attributes: + dllexport - for exporting a function/variable that will live in a dll + dllimport - for importing a function/variable from a dll + + Microsoft allows multiple declspecs in one __declspec, separating + them with spaces. We do NOT support this. Instead, use __declspec + multiple times. */ + { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute }, + { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute }, +#endif + { NULL, 0, 0, false, false, false, NULL } +}; + +/* Set default optimization options. */ +static const struct default_options sh_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_mdiv_, "inv:minlat", 1 }, + { OPT_LEVELS_SIZE, OPT_mdiv_, SH_DIV_STR_FOR_SIZE, 1 }, + { OPT_LEVELS_0_ONLY, OPT_mdiv_, "", 1 }, + { OPT_LEVELS_SIZE, OPT_mcbranchdi, NULL, 0 }, + /* We can't meaningfully test TARGET_SHMEDIA here, because -m + options haven't been parsed yet, hence we'd read only the + default. sh_target_reg_class will return NO_REGS if this is + not SHMEDIA, so it's OK to always set + flag_branch_target_load_optimize. */ + { OPT_LEVELS_2_PLUS, OPT_fbranch_target_load_optimize, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +/* Initialize the GCC target structure. */ +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE sh_attribute_table + +/* The next two are used for debug info when compiling with -gdwarf. */ +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t" +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t" + +/* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */ +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE sh_option_override +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE sh_option_optimization_table +#undef TARGET_OPTION_INIT_STRUCT +#define TARGET_OPTION_INIT_STRUCT sh_option_init_struct +#undef TARGET_OPTION_DEFAULT_PARAMS +#define TARGET_OPTION_DEFAULT_PARAMS sh_option_default_params + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND sh_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra + +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk + +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START sh_file_start +#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION sh_handle_option + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST sh_register_move_cost + +#undef TARGET_INSERT_ATTRIBUTES +#define TARGET_INSERT_ATTRIBUTES sh_insert_attributes + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST sh_adjust_cost + +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE sh_issue_rate + +/* The next 5 hooks have been implemented for reenabling sched1. With the + help of these macros we are limiting the movement of insns in sched1 to + reduce the register pressure. The overall idea is to keep count of SImode + and SFmode regs required by already scheduled insns. When these counts + cross some threshold values; give priority to insns that free registers. + The insn that frees registers is most likely to be the insn with lowest + LUID (original insn order); but such an insn might be there in the stalled + queue (Q) instead of the ready queue (R). To solve this, we skip cycles + upto a max of 8 cycles so that such insns may move from Q -> R. + + The description of the hooks are as below: + + TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic + scheduler; it is called inside the sched_init function just after + find_insn_reg_weights function call. It is used to calculate the SImode + and SFmode weights of insns of basic blocks; much similar to what + find_insn_reg_weights does. + TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook. + + TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is + indicated by TARGET_SCHED_REORDER2; doing this may move insns from + (Q)->(R). + + TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is + high; reorder the ready queue so that the insn with lowest LUID will be + issued next. + + TARGET_SCHED_REORDER2: If the register pressure is high, indicate to + TARGET_SCHED_DFA_NEW_CYCLE to skip cycles. + + TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it + can be returned from TARGET_SCHED_REORDER2. + + TARGET_SCHED_INIT: Reset the register pressure counting variables. */ + +#undef TARGET_SCHED_DFA_NEW_CYCLE +#define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle + +#undef TARGET_SCHED_INIT_GLOBAL +#define TARGET_SCHED_INIT_GLOBAL sh_md_init_global + +#undef TARGET_SCHED_FINISH_GLOBAL +#define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global + +#undef TARGET_SCHED_VARIABLE_ISSUE +#define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue + +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER sh_reorder + +#undef TARGET_SCHED_REORDER2 +#define TARGET_SCHED_REORDER2 sh_reorder2 + +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT sh_md_init + +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address + +#undef TARGET_CANNOT_MODIFY_JUMPS_P +#define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p +#undef TARGET_BRANCH_TARGET_REGISTER_CLASS +#define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class +#undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED +#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \ + sh_optimize_target_register_callee_saved + +#undef TARGET_MS_BITFIELD_LAYOUT_P +#define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS sh_init_builtins +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL sh_builtin_decl +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN sh_expand_builtin + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall + +#undef TARGET_CANNOT_COPY_INSN_P +#define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS sh_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST sh_address_cost +#undef TARGET_ALLOCATE_INITIAL_VALUE +#define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG sh_reorg + +#undef TARGET_DWARF_REGISTER_SPAN +#define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span + +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS true +#endif + +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE sh_function_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE sh_libcall_value +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY sh_return_in_memory + +#undef TARGET_EXPAND_BUILTIN_SAVEREGS +#define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming +#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED +#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE sh_pass_by_reference +#undef TARGET_CALLEE_COPIES +#define TARGET_CALLEE_COPIES sh_callee_copies +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG sh_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START sh_va_start +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p + +#undef TARGET_CHECK_PCH_TARGET_FLAGS +#define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags + +#undef TARGET_DWARF_CALLING_CONVENTION +#define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required + +/* Return regmode weight for insn. */ +#define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)] + +/* Return current register pressure for regmode. */ +#define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1] + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO sh_encode_section_info + +#ifdef SYMBIAN + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding +#undef TARGET_CXX_IMPORT_EXPORT_CLASS +#define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class + +#endif /* SYMBIAN */ + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD sh_secondary_reload + +#undef TARGET_PREFERRED_RELOAD_CLASS +#define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p + +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT sh_trampoline_init +#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS +#define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address + +/* Machine-specific symbol_ref flags. */ +#define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0) + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, + int value ATTRIBUTE_UNUSED) +{ + switch (code) + { + case OPT_m1: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1; + return true; + + case OPT_m2: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2; + return true; + + case OPT_m2a: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A; + return true; + + case OPT_m2a_nofpu: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU; + return true; + + case OPT_m2a_single: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE; + return true; + + case OPT_m2a_single_only: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY; + return true; + + case OPT_m2e: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E; + return true; + + case OPT_m3: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3; + return true; + + case OPT_m3e: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E; + return true; + + case OPT_m4: + case OPT_m4_100: + case OPT_m4_200: + case OPT_m4_300: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4; + return true; + + case OPT_m4_nofpu: + case OPT_m4_100_nofpu: + case OPT_m4_200_nofpu: + case OPT_m4_300_nofpu: + case OPT_m4_340: + case OPT_m4_400: + case OPT_m4_500: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU; + return true; + + case OPT_m4_single: + case OPT_m4_100_single: + case OPT_m4_200_single: + case OPT_m4_300_single: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE; + return true; + + case OPT_m4_single_only: + case OPT_m4_100_single_only: + case OPT_m4_200_single_only: + case OPT_m4_300_single_only: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY; + return true; + + case OPT_m4a: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A; + return true; + + case OPT_m4a_nofpu: + case OPT_m4al: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU; + return true; + + case OPT_m4a_single: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE; + return true; + + case OPT_m4a_single_only: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY; + return true; + + case OPT_m5_32media: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA; + return true; + + case OPT_m5_32media_nofpu: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU; + return true; + + case OPT_m5_64media: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA; + return true; + + case OPT_m5_64media_nofpu: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU; + return true; + + case OPT_m5_compact: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT; + return true; + + case OPT_m5_compact_nofpu: + target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU; + return true; + + default: + return true; + } +} + +/* Implement TARGET_OPTION_INIT_STRUCT. */ +static void +sh_option_init_struct (struct gcc_options *opts) +{ + /* We can't meaningfully test TARGET_SH2E / TARGET_IEEE + here, so leave it to TARGET_OPTION_OVERRIDE to set + flag_finite_math_only. We set it to 2 here so we know if the user + explicitly requested this to be on or off. */ + opts->x_flag_finite_math_only = 2; +} + +/* Implement TARGET_OPTION_DEFAULT_PARAMS. */ +static void +sh_option_default_params (void) +{ + set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 2); +} + +/* Implement TARGET_OPTION_OVERRIDE macro. Validate and override + various options, and do some machine dependent initialization. */ +static void +sh_option_override (void) +{ + int regno; + + SUBTARGET_OVERRIDE_OPTIONS; + if (optimize > 1 && !optimize_size) + target_flags |= MASK_SAVE_ALL_TARGET_REGS; + sh_cpu = PROCESSOR_SH1; + assembler_dialect = 0; + if (TARGET_SH2) + sh_cpu = PROCESSOR_SH2; + if (TARGET_SH2E) + sh_cpu = PROCESSOR_SH2E; + if (TARGET_SH2A) + sh_cpu = PROCESSOR_SH2A; + if (TARGET_SH3) + sh_cpu = PROCESSOR_SH3; + if (TARGET_SH3E) + sh_cpu = PROCESSOR_SH3E; + if (TARGET_SH4) + { + assembler_dialect = 1; + sh_cpu = PROCESSOR_SH4; + } + if (TARGET_SH4A_ARCH) + { + assembler_dialect = 1; + sh_cpu = PROCESSOR_SH4A; + } + if (TARGET_SH5) + { + sh_cpu = PROCESSOR_SH5; + target_flags |= MASK_ALIGN_DOUBLE; + if (TARGET_SHMEDIA_FPU) + target_flags |= MASK_FMOVD; + if (TARGET_SHMEDIA) + { + /* There are no delay slots on SHmedia. */ + flag_delayed_branch = 0; + /* Relaxation isn't yet supported for SHmedia */ + target_flags &= ~MASK_RELAX; + /* After reload, if conversion does little good but can cause + ICEs: + - find_if_block doesn't do anything for SH because we don't + have conditional execution patterns. (We use conditional + move patterns, which are handled differently, and only + before reload). + - find_cond_trap doesn't do anything for the SH because we + don't have conditional traps. + - find_if_case_1 uses redirect_edge_and_branch_force in + the only path that does an optimization, and this causes + an ICE when branch targets are in registers. + - find_if_case_2 doesn't do anything for the SHmedia after + reload except when it can redirect a tablejump - and + that's rather rare. */ + flag_if_conversion2 = 0; + if (! strcmp (sh_div_str, "call")) + sh_div_strategy = SH_DIV_CALL; + else if (! strcmp (sh_div_str, "call2")) + sh_div_strategy = SH_DIV_CALL2; + if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY) + sh_div_strategy = SH_DIV_FP; + else if (! strcmp (sh_div_str, "inv")) + sh_div_strategy = SH_DIV_INV; + else if (! strcmp (sh_div_str, "inv:minlat")) + sh_div_strategy = SH_DIV_INV_MINLAT; + else if (! strcmp (sh_div_str, "inv20u")) + sh_div_strategy = SH_DIV_INV20U; + else if (! strcmp (sh_div_str, "inv20l")) + sh_div_strategy = SH_DIV_INV20L; + else if (! strcmp (sh_div_str, "inv:call2")) + sh_div_strategy = SH_DIV_INV_CALL2; + else if (! strcmp (sh_div_str, "inv:call")) + sh_div_strategy = SH_DIV_INV_CALL; + else if (! strcmp (sh_div_str, "inv:fp")) + { + if (TARGET_FPU_ANY) + sh_div_strategy = SH_DIV_INV_FP; + else + sh_div_strategy = SH_DIV_INV; + } + TARGET_CBRANCHDI4 = 0; + /* Assembler CFI isn't yet fully supported for SHmedia. */ + flag_dwarf2_cfi_asm = 0; + } + } + else + { + /* Only the sh64-elf assembler fully supports .quad properly. */ + targetm.asm_out.aligned_op.di = NULL; + targetm.asm_out.unaligned_op.di = NULL; + } + if (TARGET_SH1) + { + if (! strcmp (sh_div_str, "call-div1")) + sh_div_strategy = SH_DIV_CALL_DIV1; + else if (! strcmp (sh_div_str, "call-fp") + && (TARGET_FPU_DOUBLE + || (TARGET_HARD_SH4 && TARGET_SH2E) + || (TARGET_SHCOMPACT && TARGET_FPU_ANY))) + sh_div_strategy = SH_DIV_CALL_FP; + else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2) + sh_div_strategy = SH_DIV_CALL_TABLE; + else + /* Pick one that makes most sense for the target in general. + It is not much good to use different functions depending + on -Os, since then we'll end up with two different functions + when some of the code is compiled for size, and some for + speed. */ + + /* SH4 tends to emphasize speed. */ + if (TARGET_HARD_SH4) + sh_div_strategy = SH_DIV_CALL_TABLE; + /* These have their own way of doing things. */ + else if (TARGET_SH2A) + sh_div_strategy = SH_DIV_INTRINSIC; + /* ??? Should we use the integer SHmedia function instead? */ + else if (TARGET_SHCOMPACT && TARGET_FPU_ANY) + sh_div_strategy = SH_DIV_CALL_FP; + /* SH1 .. SH3 cores often go into small-footprint systems, so + default to the smallest implementation available. */ + else if (TARGET_SH2) /* ??? EXPERIMENTAL */ + sh_div_strategy = SH_DIV_CALL_TABLE; + else + sh_div_strategy = SH_DIV_CALL_DIV1; + } + if (!TARGET_SH1) + TARGET_PRETEND_CMOVE = 0; + if (sh_divsi3_libfunc[0]) + ; /* User supplied - leave it alone. */ + else if (TARGET_DIVIDE_CALL_FP) + sh_divsi3_libfunc = "__sdivsi3_i4"; + else if (TARGET_DIVIDE_CALL_TABLE) + sh_divsi3_libfunc = "__sdivsi3_i4i"; + else if (TARGET_SH5) + sh_divsi3_libfunc = "__sdivsi3_1"; + else + sh_divsi3_libfunc = "__sdivsi3"; + if (sh_branch_cost == -1) + sh_branch_cost + = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (! VALID_REGISTER_P (regno)) + sh_register_names[regno][0] = '\0'; + + for (regno = 0; regno < ADDREGNAMES_SIZE; regno++) + if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno))) + sh_additional_register_names[regno][0] = '\0'; + + if ((flag_pic && ! TARGET_PREFERGOT) + || (TARGET_SHMEDIA && !TARGET_PT_FIXED)) + flag_no_function_cse = 1; + + if (targetm.small_register_classes_for_mode_p (VOIDmode)) \ + { + /* Never run scheduling before reload, since that can + break global alloc, and generates slower code anyway due + to the pressure on R0. */ + /* Enable sched1 for SH4 if the user explicitly requests. + When sched1 is enabled, the ready queue will be reordered by + the target hooks if pressure is high. We can not do this for + PIC, SH3 and lower as they give spill failures for R0. */ + if (!TARGET_HARD_SH4 || flag_pic) + flag_schedule_insns = 0; + /* ??? Current exception handling places basic block boundaries + after call_insns. It causes the high pressure on R0 and gives + spill failures for R0 in reload. See PR 22553 and the thread + on gcc-patches + . */ + else if (flag_exceptions) + { + if (flag_schedule_insns && global_options_set.x_flag_schedule_insns) + warning (0, "ignoring -fschedule-insns because of exception handling bug"); + flag_schedule_insns = 0; + } + else if (flag_schedule_insns + && !global_options_set.x_flag_schedule_insns) + flag_schedule_insns = 0; + } + + /* Unwind info is not correct around the CFG unless either a frame + pointer is present or M_A_O_A is set. Fixing this requires rewriting + unwind info generation to be aware of the CFG and propagating states + around edges. */ + if ((flag_unwind_tables || flag_asynchronous_unwind_tables + || flag_exceptions || flag_non_call_exceptions) + && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS) + { + warning (0, "unwind tables currently require either a frame pointer " + "or -maccumulate-outgoing-args for correctness"); + TARGET_ACCUMULATE_OUTGOING_ARGS = 1; + } + + /* Unwinding with -freorder-blocks-and-partition does not work on this + architecture, because it requires far jumps to label crossing between + hot/cold sections which are rejected on this architecture. */ + if (flag_reorder_blocks_and_partition) + { + if (flag_exceptions) + { + inform (input_location, + "-freorder-blocks-and-partition does not work with " + "exceptions on this architecture"); + flag_reorder_blocks_and_partition = 0; + flag_reorder_blocks = 1; + } + else if (flag_unwind_tables) + { + inform (input_location, + "-freorder-blocks-and-partition does not support unwind " + "info on this architecture"); + flag_reorder_blocks_and_partition = 0; + flag_reorder_blocks = 1; + } + } + + if (align_loops == 0) + align_loops = 1 << (TARGET_SH5 ? 3 : 2); + if (align_jumps == 0) + align_jumps = 1 << CACHE_LOG; + else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2)) + align_jumps = TARGET_SHMEDIA ? 4 : 2; + + /* Allocation boundary (in *bytes*) for the code of a function. + SH1: 32 bit alignment is faster, because instructions are always + fetched as a pair from a longword boundary. + SH2 .. SH5 : align to cache line start. */ + if (align_functions == 0) + align_functions + = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG); + /* The linker relaxation code breaks when a function contains + alignments that are larger than that at the start of a + compilation unit. */ + if (TARGET_RELAX) + { + int min_align + = align_loops > align_jumps ? align_loops : align_jumps; + + /* Also take possible .long constants / mova tables int account. */ + if (min_align < 4) + min_align = 4; + if (align_functions < min_align) + align_functions = min_align; + } + + /* If the -mieee option was not explicitly set by the user, turn it on + unless -ffinite-math-only was specified. See also PR 33135. */ + if (! global_options_set.x_TARGET_IEEE) + TARGET_IEEE = ! flag_finite_math_only; + + if (sh_fixed_range_str) + sh_fix_range (sh_fixed_range_str); + + /* This target defaults to strict volatile bitfields. */ + if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2)) + flag_strict_volatile_bitfields = 1; +} + +/* Print the operand address in x to the stream. */ + +static void +sh_print_operand_address (FILE *stream, rtx x) +{ + switch (GET_CODE (x)) + { + case REG: + case SUBREG: + fprintf (stream, "@%s", reg_names[true_regnum (x)]); + break; + + case PLUS: + { + rtx base = XEXP (x, 0); + rtx index = XEXP (x, 1); + + switch (GET_CODE (index)) + { + case CONST_INT: + fprintf (stream, "@(%d,%s)", (int) INTVAL (index), + reg_names[true_regnum (base)]); + break; + + case REG: + case SUBREG: + { + int base_num = true_regnum (base); + int index_num = true_regnum (index); + + fprintf (stream, "@(r0,%s)", + reg_names[MAX (base_num, index_num)]); + break; + } + + default: + gcc_unreachable (); + } + } + break; + + case PRE_DEC: + fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]); + break; + + case POST_INC: + fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]); + break; + + default: + x = mark_constant_pool_use (x); + output_addr_const (stream, x); + break; + } +} + +/* Print operand x (an rtx) in assembler syntax to file stream + according to modifier code. + + '.' print a .s if insn needs delay slot + ',' print LOCAL_LABEL_PREFIX + '@' print trap, rte or rts depending upon pragma interruptness + '#' output a nop if there is nothing to put in the delay slot + ''' print likelihood suffix (/u for unlikely). + '>' print branch target if -fverbose-asm + 'O' print a constant without the # + 'R' print the LSW of a dp value - changes if in little endian + 'S' print the MSW of a dp value - changes if in little endian + 'T' print the next word of a dp value - same as 'R' in big endian mode. + 'M' SHMEDIA: print an `x' if `m' will print `base,index'. + otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM. + 'N' print 'r63' if the operand is (const_int 0). + 'd' print a V2SF reg as dN instead of fpN. + 'm' print a pair `base,offset' or `base,index', for LD and ST. + 'U' Likewise for {LD,ST}{HI,LO}. + 'V' print the position of a single bit set. + 'W' print the position of a single bit cleared. + 't' print a memory address which is a register. + 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value. + 'o' output an operator. */ + +static void +sh_print_operand (FILE *stream, rtx x, int code) +{ + int regno; + enum machine_mode mode; + + switch (code) + { + tree trapa_attr; + + case '.': + if (final_sequence + && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)) + && get_attr_length (XVECEXP (final_sequence, 0, 1))) + fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s"); + break; + case ',': + fprintf (stream, "%s", LOCAL_LABEL_PREFIX); + break; + case '@': + trapa_attr = lookup_attribute ("trap_exit", + DECL_ATTRIBUTES (current_function_decl)); + if (trapa_attr) + fprintf (stream, "trapa #%ld", + (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr)))); + else if (sh_cfun_interrupt_handler_p ()) + { + if (sh_cfun_resbank_handler_p ()) + fprintf (stream, "resbank\n"); + fprintf (stream, "rte"); + } + else + fprintf (stream, "rts"); + break; + case '#': + /* Output a nop if there's nothing in the delay slot. */ + if (dbr_sequence_length () == 0) + fprintf (stream, "\n\tnop"); + break; + case '\'': + { + rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0); + + if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE) + fputs ("/u", stream); + break; + } + case '>': + if (flag_verbose_asm && JUMP_LABEL (current_output_insn)) + { + fputs ("\t! target: ", stream); + output_addr_const (stream, JUMP_LABEL (current_output_insn)); + } + break; + case 'O': + x = mark_constant_pool_use (x); + output_addr_const (stream, x); + break; + /* N.B.: %R / %S / %T adjust memory addresses by four. + For SHMEDIA, that means they can be used to access the first and + second 32 bit part of a 64 bit (or larger) value that + might be held in floating point registers or memory. + While they can be used to access 64 bit parts of a larger value + held in general purpose registers, that won't work with memory - + neither for fp registers, since the frxx names are used. */ + case 'R': + if (REG_P (x) || GET_CODE (x) == SUBREG) + { + regno = true_regnum (x); + regno += FP_REGISTER_P (regno) ? 1 : LSW; + fputs (reg_names[regno], (stream)); + } + else if (MEM_P (x)) + { + x = adjust_address (x, SImode, 4 * LSW); + sh_print_operand_address (stream, XEXP (x, 0)); + } + else + { + rtx sub = NULL_RTX; + + mode = GET_MODE (x); + if (mode == VOIDmode) + mode = DImode; + if (GET_MODE_SIZE (mode) >= 8) + sub = simplify_subreg (SImode, x, mode, 4 * LSW); + if (sub) + sh_print_operand (stream, sub, 0); + else + output_operand_lossage ("invalid operand to %%R"); + } + break; + case 'S': + if (REG_P (x) || GET_CODE (x) == SUBREG) + { + regno = true_regnum (x); + regno += FP_REGISTER_P (regno) ? 0 : MSW; + fputs (reg_names[regno], (stream)); + } + else if (MEM_P (x)) + { + x = adjust_address (x, SImode, 4 * MSW); + sh_print_operand_address (stream, XEXP (x, 0)); + } + else + { + rtx sub = NULL_RTX; + + mode = GET_MODE (x); + if (mode == VOIDmode) + mode = DImode; + if (GET_MODE_SIZE (mode) >= 8) + sub = simplify_subreg (SImode, x, mode, 4 * MSW); + if (sub) + sh_print_operand (stream, sub, 0); + else + output_operand_lossage ("invalid operand to %%S"); + } + break; + case 'T': + /* Next word of a double. */ + switch (GET_CODE (x)) + { + case REG: + fputs (reg_names[REGNO (x) + 1], (stream)); + break; + case MEM: + if (GET_CODE (XEXP (x, 0)) != PRE_DEC + && GET_CODE (XEXP (x, 0)) != POST_INC) + x = adjust_address (x, SImode, 4); + sh_print_operand_address (stream, XEXP (x, 0)); + break; + default: + break; + } + break; + + case 't': + gcc_assert (MEM_P (x)); + x = XEXP (x, 0); + switch (GET_CODE (x)) + { + case REG: + case SUBREG: + sh_print_operand (stream, x, 0); + break; + default: + break; + } + break; + + case 'o': + switch (GET_CODE (x)) + { + case PLUS: fputs ("add", stream); break; + case MINUS: fputs ("sub", stream); break; + case MULT: fputs ("mul", stream); break; + case DIV: fputs ("div", stream); break; + case EQ: fputs ("eq", stream); break; + case NE: fputs ("ne", stream); break; + case GT: case LT: fputs ("gt", stream); break; + case GE: case LE: fputs ("ge", stream); break; + case GTU: case LTU: fputs ("gtu", stream); break; + case GEU: case LEU: fputs ("geu", stream); break; + default: + break; + } + break; + case 'M': + if (TARGET_SHMEDIA) + { + if (MEM_P (x) + && GET_CODE (XEXP (x, 0)) == PLUS + && (REG_P (XEXP (XEXP (x, 0), 1)) + || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG)) + fputc ('x', stream); + } + else + { + if (MEM_P (x)) + { + switch (GET_MODE (x)) + { + case QImode: fputs (".b", stream); break; + case HImode: fputs (".w", stream); break; + case SImode: fputs (".l", stream); break; + case SFmode: fputs (".s", stream); break; + case DFmode: fputs (".d", stream); break; + default: gcc_unreachable (); + } + } + } + break; + + case 'm': + gcc_assert (MEM_P (x)); + x = XEXP (x, 0); + /* Fall through. */ + case 'U': + switch (GET_CODE (x)) + { + case REG: + case SUBREG: + sh_print_operand (stream, x, 0); + fputs (", 0", stream); + break; + + case PLUS: + sh_print_operand (stream, XEXP (x, 0), 0); + fputs (", ", stream); + sh_print_operand (stream, XEXP (x, 1), 0); + break; + + default: + gcc_unreachable (); + } + break; + + case 'V': + { + int num = exact_log2 (INTVAL (x)); + gcc_assert (num >= 0); + fprintf (stream, "#%d", num); + } + break; + + case 'W': + { + int num = exact_log2 (~INTVAL (x)); + gcc_assert (num >= 0); + fprintf (stream, "#%d", num); + } + break; + + case 'd': + gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode); + + fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1); + break; + + case 'N': + if (x == CONST0_RTX (GET_MODE (x))) + { + fprintf ((stream), "r63"); + break; + } + goto default_output; + case 'u': + if (CONST_INT_P (x)) + { + fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1)); + break; + } + /* Fall through. */ + + default_output: + default: + regno = 0; + mode = GET_MODE (x); + + switch (GET_CODE (x)) + { + case TRUNCATE: + { + rtx inner = XEXP (x, 0); + int offset = 0; + enum machine_mode inner_mode; + + /* We might see SUBREGs with vector mode registers inside. */ + if (GET_CODE (inner) == SUBREG + && (GET_MODE_SIZE (GET_MODE (inner)) + == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) + && subreg_lowpart_p (inner)) + inner = SUBREG_REG (inner); + if (CONST_INT_P (inner)) + { + x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x))); + goto default_output; + } + inner_mode = GET_MODE (inner); + if (GET_CODE (inner) == SUBREG + && (GET_MODE_SIZE (GET_MODE (inner)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) + && REG_P (SUBREG_REG (inner))) + { + offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)), + GET_MODE (SUBREG_REG (inner)), + SUBREG_BYTE (inner), + GET_MODE (inner)); + inner = SUBREG_REG (inner); + } + if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8) + abort (); + /* Floating point register pairs are always big endian; + general purpose registers are 64 bit wide. */ + regno = REGNO (inner); + regno = (HARD_REGNO_NREGS (regno, inner_mode) + - HARD_REGNO_NREGS (regno, mode)) + + offset; + x = inner; + goto reg; + } + case SIGN_EXTEND: + x = XEXP (x, 0); + goto reg; + /* FIXME: We need this on SHmedia32 because reload generates + some sign-extended HI or QI loads into DImode registers + but, because Pmode is SImode, the address ends up with a + subreg:SI of the DImode register. Maybe reload should be + fixed so as to apply alter_subreg to such loads? */ + case IF_THEN_ELSE: + gcc_assert (trapping_target_operand (x, VOIDmode)); + x = XEXP (XEXP (x, 2), 0); + goto default_output; + case SUBREG: + gcc_assert (SUBREG_BYTE (x) == 0 + && REG_P (SUBREG_REG (x))); + + x = SUBREG_REG (x); + /* Fall through. */ + + reg: + case REG: + regno += REGNO (x); + if (FP_REGISTER_P (regno) + && mode == V16SFmode) + fprintf ((stream), "mtrx%s", reg_names[regno] + 2); + else if (FP_REGISTER_P (REGNO (x)) + && mode == V4SFmode) + fprintf ((stream), "fv%s", reg_names[regno] + 2); + else if (REG_P (x) + && mode == V2SFmode) + fprintf ((stream), "fp%s", reg_names[regno] + 2); + else if (FP_REGISTER_P (REGNO (x)) + && GET_MODE_SIZE (mode) > 4) + fprintf ((stream), "d%s", reg_names[regno] + 1); + else + fputs (reg_names[regno], (stream)); + break; + + case MEM: + output_address (XEXP (x, 0)); + break; + + default: + if (TARGET_SH1) + fputc ('#', stream); + output_addr_const (stream, x); + break; + } + break; + } +} + +static bool +sh_print_operand_punct_valid_p (unsigned char code) +{ + return (code == '.' || code == '#' || code == '@' || code == ',' + || code == '$' || code == '\'' || code == '>'); +} + +/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool +sh_asm_output_addr_const_extra (FILE *file, rtx x) +{ + if (GET_CODE (x) == UNSPEC) + { + switch (XINT (x, 1)) + { + case UNSPEC_DATALABEL: + fputs ("datalabel ", file); + output_addr_const (file, XVECEXP (x, 0, 0)); + break; + case UNSPEC_PIC: + /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */ + output_addr_const (file, XVECEXP (x, 0, 0)); + break; + case UNSPEC_GOT: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@GOT", file); + break; + case UNSPEC_GOTOFF: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@GOTOFF", file); + break; + case UNSPEC_PLT: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@PLT", file); + break; + case UNSPEC_GOTPLT: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@GOTPLT", file); + break; + case UNSPEC_DTPOFF: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@DTPOFF", file); + break; + case UNSPEC_GOTTPOFF: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@GOTTPOFF", file); + break; + case UNSPEC_TPOFF: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@TPOFF", file); + break; + case UNSPEC_CALLER: + { + char name[32]; + /* LPCS stands for Label for PIC Call Site. */ + targetm.asm_out.generate_internal_label (name, "LPCS", + INTVAL (XVECEXP (x, 0, 0))); + assemble_name (file, name); + } + break; + case UNSPEC_EXTRACT_S16: + case UNSPEC_EXTRACT_U16: + { + rtx val, shift; + + val = XVECEXP (x, 0, 0); + shift = XVECEXP (x, 0, 1); + fputc ('(', file); + if (shift != const0_rtx) + fputc ('(', file); + if (GET_CODE (val) == CONST + || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ) + { + fputc ('(', file); + output_addr_const (file, val); + fputc (')', file); + } + else + output_addr_const (file, val); + if (shift != const0_rtx) + { + fputs (" >> ", file); + output_addr_const (file, shift); + fputc (')', file); + } + fputs (" & 65535)", file); + } + break; + case UNSPEC_SYMOFF: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputc ('-', file); + if (GET_CODE (XVECEXP (x, 0, 1)) == CONST) + { + fputc ('(', file); + output_addr_const (file, XVECEXP (x, 0, 1)); + fputc (')', file); + } + else + output_addr_const (file, XVECEXP (x, 0, 1)); + break; + case UNSPEC_PCREL_SYMOFF: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("-(", file); + output_addr_const (file, XVECEXP (x, 0, 1)); + fputs ("-.)", file); + break; + default: + return false; + } + return true; + } + else + return false; +} + + +/* Encode symbol attributes of a SYMBOL_REF into its + SYMBOL_REF_FLAGS. */ +static void +sh_encode_section_info (tree decl, rtx rtl, int first) +{ + default_encode_section_info (decl, rtl, first); + + if (TREE_CODE (decl) == FUNCTION_DECL + && sh2a_function_vector_p (decl) && TARGET_SH2A) + SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION; +} + +/* Like force_operand, but guarantees that VALUE ends up in TARGET. */ +static void +force_into (rtx value, rtx target) +{ + value = force_operand (value, target); + if (! rtx_equal_p (value, target)) + emit_insn (gen_move_insn (target, value)); +} + +/* Emit code to perform a block move. Choose the best method. + + OPERANDS[0] is the destination. + OPERANDS[1] is the source. + OPERANDS[2] is the size. + OPERANDS[3] is the alignment safe to use. */ + +int +expand_block_move (rtx *operands) +{ + int align = INTVAL (operands[3]); + int constp = (CONST_INT_P (operands[2])); + int bytes = (constp ? INTVAL (operands[2]) : 0); + + if (! constp) + return 0; + + /* If we could use mov.l to move words and dest is word-aligned, we + can use movua.l for loads and still generate a relatively short + and efficient sequence. */ + if (TARGET_SH4A_ARCH && align < 4 + && MEM_ALIGN (operands[0]) >= 32 + && can_move_by_pieces (bytes, 32)) + { + rtx dest = copy_rtx (operands[0]); + rtx src = copy_rtx (operands[1]); + /* We could use different pseudos for each copied word, but + since movua can only load into r0, it's kind of + pointless. */ + rtx temp = gen_reg_rtx (SImode); + rtx src_addr = copy_addr_to_reg (XEXP (src, 0)); + int copied = 0; + + while (copied + 4 <= bytes) + { + rtx to = adjust_address (dest, SImode, copied); + rtx from = adjust_automodify_address (src, BLKmode, + src_addr, copied); + + set_mem_size (from, GEN_INT (4)); + emit_insn (gen_movua (temp, from)); + emit_move_insn (src_addr, plus_constant (src_addr, 4)); + emit_move_insn (to, temp); + copied += 4; + } + + if (copied < bytes) + move_by_pieces (adjust_address (dest, BLKmode, copied), + adjust_automodify_address (src, BLKmode, + src_addr, copied), + bytes - copied, align, 0); + + return 1; + } + + /* If it isn't a constant number of bytes, or if it doesn't have 4 byte + alignment, or if it isn't a multiple of 4 bytes, then fail. */ + if (align < 4 || (bytes % 4 != 0)) + return 0; + + if (TARGET_HARD_SH4) + { + if (bytes < 12) + return 0; + else if (bytes == 12) + { + rtx func_addr_rtx = gen_reg_rtx (Pmode); + rtx r4 = gen_rtx_REG (SImode, 4); + rtx r5 = gen_rtx_REG (SImode, 5); + + function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC); + force_into (XEXP (operands[0], 0), r4); + force_into (XEXP (operands[1], 0), r5); + emit_insn (gen_block_move_real_i4 (func_addr_rtx)); + return 1; + } + else if (! optimize_size) + { + const char *entry_name; + rtx func_addr_rtx = gen_reg_rtx (Pmode); + int dwords; + rtx r4 = gen_rtx_REG (SImode, 4); + rtx r5 = gen_rtx_REG (SImode, 5); + rtx r6 = gen_rtx_REG (SImode, 6); + + entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even"); + function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC); + force_into (XEXP (operands[0], 0), r4); + force_into (XEXP (operands[1], 0), r5); + + dwords = bytes >> 3; + emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); + emit_insn (gen_block_lump_real_i4 (func_addr_rtx)); + return 1; + } + else + return 0; + } + if (bytes < 64) + { + char entry[30]; + rtx func_addr_rtx = gen_reg_rtx (Pmode); + rtx r4 = gen_rtx_REG (SImode, 4); + rtx r5 = gen_rtx_REG (SImode, 5); + + sprintf (entry, "__movmemSI%d", bytes); + function_symbol (func_addr_rtx, entry, SFUNC_STATIC); + force_into (XEXP (operands[0], 0), r4); + force_into (XEXP (operands[1], 0), r5); + emit_insn (gen_block_move_real (func_addr_rtx)); + return 1; + } + + /* This is the same number of bytes as a memcpy call, but to a different + less common function name, so this will occasionally use more space. */ + if (! optimize_size) + { + rtx func_addr_rtx = gen_reg_rtx (Pmode); + int final_switch, while_loop; + rtx r4 = gen_rtx_REG (SImode, 4); + rtx r5 = gen_rtx_REG (SImode, 5); + rtx r6 = gen_rtx_REG (SImode, 6); + + function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC); + force_into (XEXP (operands[0], 0), r4); + force_into (XEXP (operands[1], 0), r5); + + /* r6 controls the size of the move. 16 is decremented from it + for each 64 bytes moved. Then the negative bit left over is used + as an index into a list of move instructions. e.g., a 72 byte move + would be set up with size(r6) = 14, for one iteration through the + big while loop, and a switch of -2 for the last part. */ + + final_switch = 16 - ((bytes / 4) % 16); + while_loop = ((bytes / 4) / 16 - 1) * 16; + emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); + emit_insn (gen_block_lump_real (func_addr_rtx)); + return 1; + } + + return 0; +} + +/* Prepare operands for a move define_expand; specifically, one of the + operands must be in a register. */ + +int +prepare_move_operands (rtx operands[], enum machine_mode mode) +{ + if ((mode == SImode || mode == DImode) + && flag_pic + && ! ((mode == Pmode || mode == ptr_mode) + && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE)) + { + rtx temp; + if (SYMBOLIC_CONST_P (operands[1])) + { + if (MEM_P (operands[0])) + operands[1] = force_reg (Pmode, operands[1]); + else if (TARGET_SHMEDIA + && GET_CODE (operands[1]) == LABEL_REF + && target_reg_operand (operands[0], mode)) + /* It's ok. */; + else + { + temp = (!can_create_pseudo_p () + ? operands[0] + : gen_reg_rtx (Pmode)); + operands[1] = legitimize_pic_address (operands[1], mode, temp); + } + } + else if (GET_CODE (operands[1]) == CONST + && GET_CODE (XEXP (operands[1], 0)) == PLUS + && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0))) + { + temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0), + mode, temp); + operands[1] = expand_binop (mode, add_optab, temp, + XEXP (XEXP (operands[1], 0), 1), + (!can_create_pseudo_p () + ? temp + : gen_reg_rtx (Pmode)), + 0, OPTAB_LIB_WIDEN); + } + } + + if (! reload_in_progress && ! reload_completed) + { + /* Copy the source to a register if both operands aren't registers. */ + if (! register_operand (operands[0], mode) + && ! sh_register_operand (operands[1], mode)) + operands[1] = copy_to_mode_reg (mode, operands[1]); + + if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode)) + { + /* This is like change_address_1 (operands[0], mode, 0, 1) , + except that we can't use that function because it is static. */ + rtx new_rtx = change_address (operands[0], mode, 0); + MEM_COPY_ATTRIBUTES (new_rtx, operands[0]); + operands[0] = new_rtx; + } + + /* This case can happen while generating code to move the result + of a library call to the target. Reject `st r0,@(rX,rY)' because + reload will fail to find a spill register for rX, since r0 is already + being used for the source. */ + else if (TARGET_SH1 + && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0) + && MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == PLUS + && REG_P (XEXP (XEXP (operands[0], 0), 1))) + operands[1] = copy_to_mode_reg (mode, operands[1]); + } + + if (mode == Pmode || mode == ptr_mode) + { + rtx op0, op1, opc; + enum tls_model tls_kind; + + op0 = operands[0]; + op1 = operands[1]; + if (GET_CODE (op1) == CONST + && GET_CODE (XEXP (op1, 0)) == PLUS + && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode) + != TLS_MODEL_NONE)) + { + opc = XEXP (XEXP (op1, 0), 1); + op1 = XEXP (XEXP (op1, 0), 0); + } + else + opc = NULL_RTX; + + if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE) + { + rtx tga_op1, tga_ret, tmp, tmp2; + + switch (tls_kind) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + tga_ret = gen_rtx_REG (Pmode, R0_REG); + emit_call_insn (gen_tls_global_dynamic (tga_ret, op1)); + op1 = tga_ret; + break; + + case TLS_MODEL_LOCAL_DYNAMIC: + tga_ret = gen_rtx_REG (Pmode, R0_REG); + emit_call_insn (gen_tls_local_dynamic (tga_ret, op1)); + + tmp = gen_reg_rtx (Pmode); + emit_move_insn (tmp, tga_ret); + + if (register_operand (op0, Pmode)) + tmp2 = op0; + else + tmp2 = gen_reg_rtx (Pmode); + + emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp)); + op1 = tmp2; + break; + + case TLS_MODEL_INITIAL_EXEC: + if (! flag_pic) + { + /* Don't schedule insns for getting GOT address when + the first scheduling is enabled, to avoid spill + failures for R0. */ + if (flag_schedule_insns) + emit_insn (gen_blockage ()); + emit_insn (gen_GOTaddr2picreg ()); + emit_use (gen_rtx_REG (SImode, PIC_REG)); + if (flag_schedule_insns) + emit_insn (gen_blockage ()); + } + tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode); + tmp = gen_sym2GOTTPOFF (op1); + emit_insn (gen_tls_initial_exec (tga_op1, tmp)); + op1 = tga_op1; + break; + + case TLS_MODEL_LOCAL_EXEC: + tmp2 = gen_reg_rtx (Pmode); + emit_insn (gen_load_gbr (tmp2)); + tmp = gen_reg_rtx (Pmode); + emit_insn (gen_symTPOFF2reg (tmp, op1)); + + if (register_operand (op0, Pmode)) + op1 = op0; + else + op1 = gen_reg_rtx (Pmode); + + emit_insn (gen_addsi3 (op1, tmp, tmp2)); + break; + + default: + gcc_unreachable (); + } + if (opc) + emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc))); + operands[1] = op1; + } + } + + return 0; +} + +enum rtx_code +prepare_cbranch_operands (rtx *operands, enum machine_mode mode, + enum rtx_code comparison) +{ + rtx op1; + rtx scratch = NULL_RTX; + + if (comparison == LAST_AND_UNUSED_RTX_CODE) + comparison = GET_CODE (operands[0]); + else + scratch = operands[4]; + if (CONST_INT_P (operands[1]) + && !CONST_INT_P (operands[2])) + { + rtx tmp = operands[1]; + + operands[1] = operands[2]; + operands[2] = tmp; + comparison = swap_condition (comparison); + } + if (CONST_INT_P (operands[2])) + { + HOST_WIDE_INT val = INTVAL (operands[2]); + if ((val == -1 || val == -0x81) + && (comparison == GT || comparison == LE)) + { + comparison = (comparison == GT) ? GE : LT; + operands[2] = gen_int_mode (val + 1, mode); + } + else if ((val == 1 || val == 0x80) + && (comparison == GE || comparison == LT)) + { + comparison = (comparison == GE) ? GT : LE; + operands[2] = gen_int_mode (val - 1, mode); + } + else if (val == 1 && (comparison == GEU || comparison == LTU)) + { + comparison = (comparison == GEU) ? NE : EQ; + operands[2] = CONST0_RTX (mode); + } + else if (val == 0x80 && (comparison == GEU || comparison == LTU)) + { + comparison = (comparison == GEU) ? GTU : LEU; + operands[2] = gen_int_mode (val - 1, mode); + } + else if (val == 0 && (comparison == GTU || comparison == LEU)) + comparison = (comparison == GTU) ? NE : EQ; + else if (mode == SImode + && ((val == 0x7fffffff + && (comparison == GTU || comparison == LEU)) + || ((unsigned HOST_WIDE_INT) val + == (unsigned HOST_WIDE_INT) 0x7fffffff + 1 + && (comparison == GEU || comparison == LTU)))) + { + comparison = (comparison == GTU || comparison == GEU) ? LT : GE; + operands[2] = CONST0_RTX (mode); + } + } + op1 = operands[1]; + if (can_create_pseudo_p ()) + operands[1] = force_reg (mode, op1); + /* When we are handling DImode comparisons, we want to keep constants so + that we can optimize the component comparisons; however, memory loads + are better issued as a whole so that they can be scheduled well. + SImode equality comparisons allow I08 constants, but only when they + compare r0. Hence, if operands[1] has to be loaded from somewhere else + into a register, that register might as well be r0, and we allow the + constant. If it is already in a register, this is likely to be + allocated to a different hard register, thus we load the constant into + a register unless it is zero. */ + if (!REG_P (operands[2]) + && (!CONST_INT_P (operands[2]) + || (mode == SImode && operands[2] != CONST0_RTX (SImode) + && ((comparison != EQ && comparison != NE) + || (REG_P (op1) && REGNO (op1) != R0_REG) + || !satisfies_constraint_I08 (operands[2]))))) + { + if (scratch && GET_MODE (scratch) == mode) + { + emit_move_insn (scratch, operands[2]); + operands[2] = scratch; + } + else if (can_create_pseudo_p ()) + operands[2] = force_reg (mode, operands[2]); + } + return comparison; +} + +void +expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability) +{ + rtx (*branch_expander) (rtx) = gen_branch_true; + rtx jump; + + comparison = prepare_cbranch_operands (operands, SImode, comparison); + switch (comparison) + { + case NE: case LT: case LE: case LTU: case LEU: + comparison = reverse_condition (comparison); + branch_expander = gen_branch_false; + default: ; + } + emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG), + gen_rtx_fmt_ee (comparison, SImode, + operands[1], operands[2]))); + jump = emit_jump_insn (branch_expander (operands[3])); + if (probability >= 0) + add_reg_note (jump, REG_BR_PROB, GEN_INT (probability)); + +} + +/* ??? How should we distribute probabilities when more than one branch + is generated. So far we only have soem ad-hoc observations: + - If the operands are random, they are likely to differ in both parts. + - If comparing items in a hash chain, the operands are random or equal; + operation should be EQ or NE. + - If items are searched in an ordered tree from the root, we can expect + the highpart to be unequal about half of the time; operation should be + an inequality comparison, operands non-constant, and overall probability + about 50%. Likewise for quicksort. + - Range checks will be often made against constants. Even if we assume for + simplicity an even distribution of the non-constant operand over a + sub-range here, the same probability could be generated with differently + wide sub-ranges - as long as the ratio of the part of the subrange that + is before the threshold to the part that comes after the threshold stays + the same. Thus, we can't really tell anything here; + assuming random distribution is at least simple. + */ + +bool +expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) +{ + enum rtx_code msw_taken, msw_skip, lsw_taken; + rtx skip_label = NULL_RTX; + rtx op1h, op1l, op2h, op2l; + int num_branches; + int prob, rev_prob; + int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1; + rtx scratch = operands[4]; + + comparison = prepare_cbranch_operands (operands, DImode, comparison); + op1h = gen_highpart_mode (SImode, DImode, operands[1]); + op2h = gen_highpart_mode (SImode, DImode, operands[2]); + op1l = gen_lowpart (SImode, operands[1]); + op2l = gen_lowpart (SImode, operands[2]); + msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE; + prob = split_branch_probability; + rev_prob = REG_BR_PROB_BASE - prob; + switch (comparison) + { + /* ??? Should we use the cmpeqdi_t pattern for equality comparisons? + That costs 1 cycle more when the first branch can be predicted taken, + but saves us mispredicts because only one branch needs prediction. + It also enables generating the cmpeqdi_t-1 pattern. */ + case EQ: + if (TARGET_CMPEQDI_T) + { + emit_insn (gen_cmpeqdi_t (operands[1], operands[2])); + emit_jump_insn (gen_branch_true (operands[3])); + return true; + } + msw_skip = NE; + lsw_taken = EQ; + if (prob >= 0) + { + /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) . + */ + msw_skip_prob = rev_prob; + if (REG_BR_PROB_BASE <= 65535) + lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0; + else + { + gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64); + lsw_taken_prob + = (prob + ? (REG_BR_PROB_BASE + - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob + / ((HOST_WIDEST_INT) prob << 32))) + : 0); + } + } + break; + case NE: + if (TARGET_CMPEQDI_T) + { + emit_insn (gen_cmpeqdi_t (operands[1], operands[2])); + emit_jump_insn (gen_branch_false (operands[3])); + return true; + } + msw_taken = NE; + msw_taken_prob = prob; + lsw_taken = NE; + lsw_taken_prob = 0; + break; + case GTU: case GT: + msw_taken = comparison; + if (CONST_INT_P (op2l) && INTVAL (op2l) == -1) + break; + if (comparison != GTU || op2h != CONST0_RTX (SImode)) + msw_skip = swap_condition (msw_taken); + lsw_taken = GTU; + break; + case GEU: case GE: + if (op2l == CONST0_RTX (SImode)) + msw_taken = comparison; + else + { + msw_taken = comparison == GE ? GT : GTU; + msw_skip = swap_condition (msw_taken); + lsw_taken = GEU; + } + break; + case LTU: case LT: + msw_taken = comparison; + if (op2l == CONST0_RTX (SImode)) + break; + msw_skip = swap_condition (msw_taken); + lsw_taken = LTU; + break; + case LEU: case LE: + if (CONST_INT_P (op2l) && INTVAL (op2l) == -1) + msw_taken = comparison; + else + { + lsw_taken = LEU; + if (comparison == LE) + msw_taken = LT; + else if (op2h != CONST0_RTX (SImode)) + msw_taken = LTU; + else + { + msw_skip = swap_condition (LTU); + break; + } + msw_skip = swap_condition (msw_taken); + } + break; + default: return false; + } + num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE) + + (msw_skip != LAST_AND_UNUSED_RTX_CODE) + + (lsw_taken != LAST_AND_UNUSED_RTX_CODE)); + if (comparison != EQ && comparison != NE && num_branches > 1) + { + if (!CONSTANT_P (operands[2]) + && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U) + && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U)) + { + msw_taken_prob = prob / 2U; + msw_skip_prob + = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob); + lsw_taken_prob = prob; + } + else + { + msw_taken_prob = prob; + msw_skip_prob = REG_BR_PROB_BASE; + /* ??? If we have a constant op2h, should we use that when + calculating lsw_taken_prob? */ + lsw_taken_prob = prob; + } + } + operands[1] = op1h; + operands[2] = op2h; + operands[4] = NULL_RTX; + if (reload_completed + && ! arith_reg_or_0_operand (op2h, SImode) + && (true_regnum (op1h) || (comparison != EQ && comparison != NE)) + && (msw_taken != LAST_AND_UNUSED_RTX_CODE + || msw_skip != LAST_AND_UNUSED_RTX_CODE)) + { + emit_move_insn (scratch, operands[2]); + operands[2] = scratch; + } + if (msw_taken != LAST_AND_UNUSED_RTX_CODE) + expand_cbranchsi4 (operands, msw_taken, msw_taken_prob); + if (msw_skip != LAST_AND_UNUSED_RTX_CODE) + { + rtx taken_label = operands[3]; + + /* Operands were possibly modified, but msw_skip doesn't expect this. + Always use the original ones. */ + if (msw_taken != LAST_AND_UNUSED_RTX_CODE) + { + operands[1] = op1h; + operands[2] = op2h; + if (reload_completed + && ! arith_reg_or_0_operand (op2h, SImode) + && (true_regnum (op1h) || (comparison != EQ && comparison != NE))) + { + emit_move_insn (scratch, operands[2]); + operands[2] = scratch; + } + } + + operands[3] = skip_label = gen_label_rtx (); + expand_cbranchsi4 (operands, msw_skip, msw_skip_prob); + operands[3] = taken_label; + } + operands[1] = op1l; + operands[2] = op2l; + if (lsw_taken != LAST_AND_UNUSED_RTX_CODE) + { + if (reload_completed + && ! arith_reg_or_0_operand (op2l, SImode) + && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE))) + { + emit_move_insn (scratch, operands[2]); + operands[2] = scratch; + } + expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob); + } + if (msw_skip != LAST_AND_UNUSED_RTX_CODE) + emit_label (skip_label); + return true; +} + +/* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */ + +static void +sh_emit_set_t_insn (rtx insn, enum machine_mode mode) +{ + if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT) + { + insn = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, insn, + gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))); + (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn); + } + else + emit_insn (insn); +} + +/* Prepare the operands for an scc instruction; make sure that the + compare has been done and the result is in T_REG. */ +void +sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1) +{ + rtx t_reg = gen_rtx_REG (SImode, T_REG); + enum rtx_code oldcode = code; + enum machine_mode mode; + + /* First need a compare insn. */ + switch (code) + { + case NE: + /* It isn't possible to handle this case. */ + gcc_unreachable (); + case LT: + code = GT; + break; + case LE: + code = GE; + break; + case LTU: + code = GTU; + break; + case LEU: + code = GEU; + break; + default: + break; + } + if (code != oldcode) + { + rtx tmp = op0; + op0 = op1; + op1 = tmp; + } + + mode = GET_MODE (op0); + if (mode == VOIDmode) + mode = GET_MODE (op1); + + op0 = force_reg (mode, op0); + if ((code != EQ && code != NE + && (op1 != const0_rtx + || code == GTU || code == GEU || code == LTU || code == LEU)) + || (mode == DImode && op1 != const0_rtx) + || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) + op1 = force_reg (mode, op1); + + sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg, + gen_rtx_fmt_ee (code, SImode, op0, op1)), + mode); +} + +rtx +sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code, + rtx op0, rtx op1) +{ + rtx target = gen_reg_rtx (SImode); + rtx tmp; + + gcc_assert (TARGET_SHMEDIA); + switch (code) + { + case EQ: + case GT: + case LT: + case UNORDERED: + case GTU: + case LTU: + tmp = gen_rtx_fmt_ee (code, SImode, op0, op1); + emit_insn (gen_cstore4_media (target, tmp, op0, op1)); + code = NE; + break; + + case NE: + case GE: + case LE: + case ORDERED: + case GEU: + case LEU: + tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1); + emit_insn (gen_cstore4_media (target, tmp, op0, op1)); + code = EQ; + break; + + case UNEQ: + case UNGE: + case UNGT: + case UNLE: + case UNLT: + case LTGT: + return NULL_RTX; + + default: + gcc_unreachable (); + } + + if (mode == DImode) + { + rtx t2 = gen_reg_rtx (DImode); + emit_insn (gen_extendsidi2 (t2, target)); + target = t2; + } + + return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx); +} + +/* Called from the md file, set up the operands of a compare instruction. */ + +void +sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode) +{ + enum rtx_code code = GET_CODE (operands[0]); + enum rtx_code branch_code; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + rtx insn, tem; + bool need_ccmpeq = false; + + if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT) + { + op0 = force_reg (mode, op0); + op1 = force_reg (mode, op1); + } + else + { + if (code != EQ || mode == DImode) + { + /* Force args into regs, since we can't use constants here. */ + op0 = force_reg (mode, op0); + if (op1 != const0_rtx || code == GTU || code == GEU) + op1 = force_reg (mode, op1); + } + } + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (code == LT + || (code == LE && TARGET_IEEE && TARGET_SH2E) + || (code == GE && !(TARGET_IEEE && TARGET_SH2E))) + { + tem = op0, op0 = op1, op1 = tem; + code = swap_condition (code); + } + + /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */ + if (code == GE) + { + gcc_assert (TARGET_IEEE && TARGET_SH2E); + need_ccmpeq = true; + code = GT; + } + + /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed + to EQ/GT respectively. */ + gcc_assert (code == EQ || code == GT || code == NE || code == LE); + } + + switch (code) + { + case EQ: + case GT: + case GE: + case GTU: + case GEU: + branch_code = code; + break; + case NE: + case LT: + case LE: + case LTU: + case LEU: + branch_code = reverse_condition (code); + break; + default: + gcc_unreachable (); + } + + insn = gen_rtx_SET (VOIDmode, + gen_rtx_REG (SImode, T_REG), + gen_rtx_fmt_ee (branch_code, SImode, op0, op1)); + + sh_emit_set_t_insn (insn, mode); + if (need_ccmpeq) + sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode); + + if (branch_code == code) + emit_jump_insn (gen_branch_true (operands[3])); + else + emit_jump_insn (gen_branch_false (operands[3])); +} + +void +sh_emit_compare_and_set (rtx *operands, enum machine_mode mode) +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx op0 = operands[2]; + rtx op1 = operands[3]; + rtx lab = NULL_RTX; + bool invert = false; + rtx tem; + + op0 = force_reg (mode, op0); + if ((code != EQ && code != NE + && (op1 != const0_rtx + || code == GTU || code == GEU || code == LTU || code == LEU)) + || (mode == DImode && op1 != const0_rtx) + || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) + op1 = force_reg (mode, op1); + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (code == LT || code == LE) + { + code = swap_condition (code); + tem = op0, op0 = op1, op1 = tem; + } + if (code == GE) + { + if (TARGET_IEEE) + { + lab = gen_label_rtx (); + sh_emit_scc_to_t (EQ, op0, op1); + emit_jump_insn (gen_branch_true (lab)); + code = GT; + } + else + { + code = LT; + invert = true; + } + } + } + + if (code == NE) + { + code = EQ; + invert = true; + } + + sh_emit_scc_to_t (code, op0, op1); + if (lab) + emit_label (lab); + if (invert) + emit_insn (gen_movnegt (operands[0])); + else + emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG)); +} + +/* Functions to output assembly code. */ + +/* Return a sequence of instructions to perform DI or DF move. + + Since the SH cannot move a DI or DF in one instruction, we have + to take care when we see overlapping source and dest registers. */ + +const char * +output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[], + enum machine_mode mode) +{ + rtx dst = operands[0]; + rtx src = operands[1]; + + if (MEM_P (dst) + && GET_CODE (XEXP (dst, 0)) == PRE_DEC) + return "mov.l %T1,%0\n\tmov.l %1,%0"; + + if (register_operand (dst, mode) + && register_operand (src, mode)) + { + if (REGNO (src) == MACH_REG) + return "sts mach,%S0\n\tsts macl,%R0"; + + /* When mov.d r1,r2 do r2->r3 then r1->r2; + when mov.d r1,r0 do r1->r0 then r2->r1. */ + + if (REGNO (src) + 1 == REGNO (dst)) + return "mov %T1,%T0\n\tmov %1,%0"; + else + return "mov %1,%0\n\tmov %T1,%T0"; + } + else if (CONST_INT_P (src)) + { + if (INTVAL (src) < 0) + output_asm_insn ("mov #-1,%S0", operands); + else + output_asm_insn ("mov #0,%S0", operands); + + return "mov %1,%R0"; + } + else if (MEM_P (src)) + { + int ptrreg = -1; + int dreg = REGNO (dst); + rtx inside = XEXP (src, 0); + + switch (GET_CODE (inside)) + { + case REG: + ptrreg = REGNO (inside); + break; + + case SUBREG: + ptrreg = subreg_regno (inside); + break; + + case PLUS: + ptrreg = REGNO (XEXP (inside, 0)); + /* ??? A r0+REG address shouldn't be possible here, because it isn't + an offsettable address. Unfortunately, offsettable addresses use + QImode to check the offset, and a QImode offsettable address + requires r0 for the other operand, which is not currently + supported, so we can't use the 'o' constraint. + Thus we must check for and handle r0+REG addresses here. + We punt for now, since this is likely very rare. */ + gcc_assert (!REG_P (XEXP (inside, 1))); + break; + + case LABEL_REF: + return "mov.l %1,%0\n\tmov.l %1+4,%T0"; + case POST_INC: + return "mov.l %1,%0\n\tmov.l %1,%T0"; + default: + gcc_unreachable (); + } + + /* Work out the safe way to copy. Copy into the second half first. */ + if (dreg == ptrreg) + return "mov.l %T1,%T0\n\tmov.l %1,%0"; + } + + return "mov.l %1,%0\n\tmov.l %T1,%T0"; +} + +/* Print an instruction which would have gone into a delay slot after + another instruction, but couldn't because the other instruction expanded + into a sequence where putting the slot insn at the end wouldn't work. */ + +static void +print_slot (rtx insn) +{ + final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL); + + INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1; +} + +const char * +output_far_jump (rtx insn, rtx op) +{ + struct { rtx lab, reg, op; } this_jmp; + rtx braf_base_lab = NULL_RTX; + const char *jump; + int far; + int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); + rtx prev; + + this_jmp.lab = gen_label_rtx (); + + if (TARGET_SH2 + && offset >= -32764 + && offset - get_attr_length (insn) <= 32766) + { + far = 0; + jump = "mov.w %O0,%1; braf %1"; + } + else + { + far = 1; + if (flag_pic) + { + if (TARGET_SH2) + jump = "mov.l %O0,%1; braf %1"; + else + jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1"; + } + else + jump = "mov.l %O0,%1; jmp @%1"; + } + /* If we have a scratch register available, use it. */ + if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn))) + && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) + { + this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0)); + if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2) + jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1"; + output_asm_insn (jump, &this_jmp.lab); + if (dbr_sequence_length ()) + print_slot (final_sequence); + else + output_asm_insn ("nop", 0); + } + else + { + /* Output the delay slot insn first if any. */ + if (dbr_sequence_length ()) + print_slot (final_sequence); + + this_jmp.reg = gen_rtx_REG (SImode, 13); + /* We must keep the stack aligned to 8-byte boundaries on SH5. + Fortunately, MACL is fixed and call-clobbered, and we never + need its value across jumps, so save r13 in it instead of in + the stack. */ + if (TARGET_SH5) + output_asm_insn ("lds r13, macl", 0); + else + output_asm_insn ("mov.l r13,@-r15", 0); + output_asm_insn (jump, &this_jmp.lab); + if (TARGET_SH5) + output_asm_insn ("sts macl, r13", 0); + else + output_asm_insn ("mov.l @r15+,r13", 0); + } + if (far && flag_pic && TARGET_SH2) + { + braf_base_lab = gen_label_rtx (); + (*targetm.asm_out.internal_label) (asm_out_file, "L", + CODE_LABEL_NUMBER (braf_base_lab)); + } + if (far) + output_asm_insn (".align 2", 0); + (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab)); + this_jmp.op = op; + if (far && flag_pic) + { + if (TARGET_SH2) + this_jmp.lab = braf_base_lab; + output_asm_insn (".long %O2-%O0", &this_jmp.lab); + } + else + output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab); + return ""; +} + +/* Local label counter, used for constants in the pool and inside + pattern branches. */ + +static int lf = 100; + +/* Output code for ordinary branches. */ + +const char * +output_branch (int logic, rtx insn, rtx *operands) +{ + switch (get_attr_length (insn)) + { + case 6: + /* This can happen if filling the delay slot has caused a forward + branch to exceed its range (we could reverse it, but only + when we know we won't overextend other branches; this should + best be handled by relaxation). + It can also happen when other condbranches hoist delay slot insn + from their destination, thus leading to code size increase. + But the branch will still be in the range -4092..+4098 bytes. */ + + if (! TARGET_RELAX) + { + int label = lf++; + /* The call to print_slot will clobber the operands. */ + rtx op0 = operands[0]; + + /* If the instruction in the delay slot is annulled (true), then + there is no delay slot where we can put it now. The only safe + place for it is after the label. final will do that by default. */ + + if (final_sequence + && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)) + && get_attr_length (XVECEXP (final_sequence, 0, 1))) + { + asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t", + ASSEMBLER_DIALECT ? "/" : ".", label); + print_slot (final_sequence); + } + else + asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label); + + output_asm_insn ("bra\t%l0", &op0); + fprintf (asm_out_file, "\tnop\n"); + (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); + + return ""; + } + /* When relaxing, handle this like a short branch. The linker + will fix it up if it still doesn't fit after relaxation. */ + case 2: + return logic ? "bt%.\t%l0" : "bf%.\t%l0"; + + /* These are for SH2e, in which we have to account for the + extra nop because of the hardware bug in annulled branches. */ + case 8: + if (! TARGET_RELAX) + { + int label = lf++; + + gcc_assert (!final_sequence + || !(INSN_ANNULLED_BRANCH_P + (XVECEXP (final_sequence, 0, 0)))); + asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n", + logic ? "f" : "t", + ASSEMBLER_DIALECT ? "/" : ".", label); + fprintf (asm_out_file, "\tnop\n"); + output_asm_insn ("bra\t%l0", operands); + fprintf (asm_out_file, "\tnop\n"); + (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); + + return ""; + } + /* When relaxing, fall through. */ + case 4: + { + char buffer[10]; + + sprintf (buffer, "b%s%ss\t%%l0", + logic ? "t" : "f", + ASSEMBLER_DIALECT ? "/" : "."); + output_asm_insn (buffer, &operands[0]); + return "nop"; + } + + default: + /* There should be no longer branches now - that would + indicate that something has destroyed the branches set + up in machine_dependent_reorg. */ + gcc_unreachable (); + } +} + +/* Output a code sequence for INSN using TEMPL with OPERANDS; but before, + fill in operands 9 as a label to the successor insn. + We try to use jump threading where possible. + IF CODE matches the comparison in the IF_THEN_ELSE of a following jump, + we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means + follow jmp and bt, if the address is in range. */ +const char * +output_branchy_insn (enum rtx_code code, const char *templ, + rtx insn, rtx *operands) +{ + rtx next_insn = NEXT_INSN (insn); + + if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn)) + { + rtx src = SET_SRC (PATTERN (next_insn)); + if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code) + { + /* Following branch not taken */ + operands[9] = gen_label_rtx (); + emit_label_after (operands[9], next_insn); + INSN_ADDRESSES_NEW (operands[9], + INSN_ADDRESSES (INSN_UID (next_insn)) + + get_attr_length (next_insn)); + return templ; + } + else + { + int offset = (branch_dest (next_insn) + - INSN_ADDRESSES (INSN_UID (next_insn)) + 4); + if (offset >= -252 && offset <= 258) + { + if (GET_CODE (src) == IF_THEN_ELSE) + /* branch_true */ + src = XEXP (src, 1); + operands[9] = src; + return templ; + } + } + } + operands[9] = gen_label_rtx (); + emit_label_after (operands[9], insn); + INSN_ADDRESSES_NEW (operands[9], + INSN_ADDRESSES (INSN_UID (insn)) + + get_attr_length (insn)); + return templ; +} + +const char * +output_ieee_ccmpeq (rtx insn, rtx *operands) +{ + return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0", + insn, operands); +} + +/* Output the start of the assembler file. */ + +static void +sh_file_start (void) +{ + default_file_start (); + +#ifdef SYMBIAN + /* Declare the .directive section before it is used. */ + fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file); + fputs ("\t.asciz \"##\\n\"\n", asm_out_file); +#endif + + if (TARGET_ELF) + /* We need to show the text section with the proper + attributes as in TEXT_SECTION_ASM_OP, before dwarf2out + emits it without attributes in TEXT_SECTION_ASM_OP, else GAS + will complain. We can teach GAS specifically about the + default attributes for our choice of text section, but + then we would have to change GAS again if/when we change + the text section name. */ + fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP); + else + /* Switch to the data section so that the coffsem symbol + isn't in the text section. */ + switch_to_section (data_section); + + if (TARGET_LITTLE_ENDIAN) + fputs ("\t.little\n", asm_out_file); + + if (!TARGET_ELF) + { + if (TARGET_SHCOMPACT) + fputs ("\t.mode\tSHcompact\n", asm_out_file); + else if (TARGET_SHMEDIA) + fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n", + TARGET_SHMEDIA64 ? 64 : 32); + } +} + +/* Check if PAT includes UNSPEC_CALLER unspec pattern. */ + +static bool +unspec_caller_rtx_p (rtx pat) +{ + rtx base, offset; + int i; + + split_const (pat, &base, &offset); + if (GET_CODE (base) == UNSPEC) + { + if (XINT (base, 1) == UNSPEC_CALLER) + return true; + for (i = 0; i < XVECLEN (base, 0); i++) + if (unspec_caller_rtx_p (XVECEXP (base, 0, i))) + return true; + } + return false; +} + +/* Indicate that INSN cannot be duplicated. This is true for insn + that generates a unique label. */ + +static bool +sh_cannot_copy_insn_p (rtx insn) +{ + rtx pat; + + if (!reload_completed || !flag_pic) + return false; + + if (!NONJUMP_INSN_P (insn)) + return false; + if (asm_noperands (insn) >= 0) + return false; + + pat = PATTERN (insn); + if (GET_CODE (pat) != SET) + return false; + pat = SET_SRC (pat); + + if (unspec_caller_rtx_p (pat)) + return true; + + return false; +} + +/* Actual number of instructions used to make a shift by N. */ +static const char ashiftrt_insns[] = + { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2}; + +/* Left shift and logical right shift are the same. */ +static const char shift_insns[] = + { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; + +/* Individual shift amounts needed to get the above length sequences. + One bit right shifts clobber the T bit, so when possible, put one bit + shifts in the middle of the sequence, so the ends are eligible for + branch delay slots. */ +static const short shift_amounts[32][5] = { + {0}, {1}, {2}, {2, 1}, + {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2}, + {8}, {8, 1}, {8, 2}, {8, 1, 2}, + {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8}, + {16}, {16, 1}, {16, 2}, {16, 1, 2}, + {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, + {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, + {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; + +/* Likewise, but for shift amounts < 16, up to three highmost bits + might be clobbered. This is typically used when combined with some + kind of sign or zero extension. */ + +static const char ext_shift_insns[] = + { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; + +static const short ext_shift_amounts[32][4] = { + {0}, {1}, {2}, {2, 1}, + {2, 2}, {2, 1, 2}, {8, -2}, {8, -1}, + {8}, {8, 1}, {8, 2}, {8, 1, 2}, + {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1}, + {16}, {16, 1}, {16, 2}, {16, 1, 2}, + {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, + {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, + {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; + +/* Assuming we have a value that has been sign-extended by at least one bit, + can we use the ext_shift_amounts with the last shift turned to an arithmetic shift + to shift it by N without data loss, and quicker than by other means? */ +#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15) + +/* This is used in length attributes in sh.md to help compute the length + of arbitrary constant shift instructions. */ + +int +shift_insns_rtx (rtx insn) +{ + rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + int shift_count = INTVAL (XEXP (set_src, 1)) & 31; + enum rtx_code shift_code = GET_CODE (set_src); + + switch (shift_code) + { + case ASHIFTRT: + return ashiftrt_insns[shift_count]; + case LSHIFTRT: + case ASHIFT: + return shift_insns[shift_count]; + default: + gcc_unreachable (); + } +} + +/* Return the cost of a shift. */ + +static inline int +shiftcosts (rtx x) +{ + int value; + + if (TARGET_SHMEDIA) + return 1; + + if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) + { + if (GET_MODE (x) == DImode + && CONST_INT_P (XEXP (x, 1)) + && INTVAL (XEXP (x, 1)) == 1) + return 2; + + /* Everything else is invalid, because there is no pattern for it. */ + return MAX_COST; + } + /* If shift by a non constant, then this will be expensive. */ + if (!CONST_INT_P (XEXP (x, 1))) + return SH_DYNAMIC_SHIFT_COST; + + /* Otherwise, return the true cost in instructions. Cope with out of range + shift counts more or less arbitrarily. */ + value = INTVAL (XEXP (x, 1)) & 31; + + if (GET_CODE (x) == ASHIFTRT) + { + int cost = ashiftrt_insns[value]; + /* If SH3, then we put the constant in a reg and use shad. */ + if (cost > 1 + SH_DYNAMIC_SHIFT_COST) + cost = 1 + SH_DYNAMIC_SHIFT_COST; + return cost; + } + else + return shift_insns[value]; +} + +/* Return the cost of an AND operation. */ + +static inline int +andcosts (rtx x) +{ + int i; + + /* Anding with a register is a single cycle and instruction. */ + if (!CONST_INT_P (XEXP (x, 1))) + return 1; + + i = INTVAL (XEXP (x, 1)); + + if (TARGET_SHMEDIA) + { + if (satisfies_constraint_I10 (XEXP (x, 1)) + || satisfies_constraint_J16 (XEXP (x, 1))) + return 1; + else + return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size); + } + + /* These constants are single cycle extu.[bw] instructions. */ + if (i == 0xff || i == 0xffff) + return 1; + /* Constants that can be used in an and immediate instruction in a single + cycle, but this requires r0, so make it a little more expensive. */ + if (CONST_OK_FOR_K08 (i)) + return 2; + /* Constants that can be loaded with a mov immediate and an and. + This case is probably unnecessary. */ + if (CONST_OK_FOR_I08 (i)) + return 2; + /* Any other constants requires a 2 cycle pc-relative load plus an and. + This case is probably unnecessary. */ + return 3; +} + +/* Return the cost of an addition or a subtraction. */ + +static inline int +addsubcosts (rtx x) +{ + /* Adding a register is a single cycle insn. */ + if (REG_P (XEXP (x, 1)) + || GET_CODE (XEXP (x, 1)) == SUBREG) + return 1; + + /* Likewise for small constants. */ + if (CONST_INT_P (XEXP (x, 1)) + && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1)))) + return 1; + + if (TARGET_SHMEDIA) + switch (GET_CODE (XEXP (x, 1))) + { + case CONST: + case LABEL_REF: + case SYMBOL_REF: + return TARGET_SHMEDIA64 ? 5 : 3; + + case CONST_INT: + if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)))) + return 2; + else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16)) + return 3; + else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16)) + return 4; + + /* Fall through. */ + default: + return 5; + } + + /* Any other constant requires a 2 cycle pc-relative load plus an + addition. */ + return 3; +} + +/* Return the cost of a multiply. */ +static inline int +multcosts (rtx x ATTRIBUTE_UNUSED) +{ + if (sh_multcost >= 0) + return sh_multcost; + if (TARGET_SHMEDIA) + /* ??? We have a mul insn, but it has a latency of three, and doesn't + accept constants. Ideally, we would use a cost of one or two and + add the cost of the operand, but disregard the latter when inside loops + and loop invariant code motion is still to follow. + Using a multiply first and splitting it later if it's a loss + doesn't work because of different sign / zero extension semantics + of multiplies vs. shifts. */ + return optimize_size ? 2 : 3; + + if (TARGET_SH2) + { + /* We have a mul insn, so we can never take more than the mul and the + read of the mac reg, but count more because of the latency and extra + reg usage. */ + if (optimize_size) + return 2; + return 3; + } + + /* If we're aiming at small code, then just count the number of + insns in a multiply call sequence. */ + if (optimize_size) + return 5; + + /* Otherwise count all the insns in the routine we'd be calling too. */ + return 20; +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +sh_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) +{ + switch (code) + { + case CONST_INT: + if (TARGET_SHMEDIA) + { + if (INTVAL (x) == 0) + *total = 0; + else if (outer_code == AND && and_operand ((x), DImode)) + *total = 0; + else if ((outer_code == IOR || outer_code == XOR + || outer_code == PLUS) + && CONST_OK_FOR_I10 (INTVAL (x))) + *total = 0; + else if (CONST_OK_FOR_I16 (INTVAL (x))) + *total = COSTS_N_INSNS (outer_code != SET); + else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16)) + *total = COSTS_N_INSNS ((outer_code != SET) + 1); + else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16)) + *total = COSTS_N_INSNS ((outer_code != SET) + 2); + else + *total = COSTS_N_INSNS ((outer_code != SET) + 3); + return true; + } + if (CONST_OK_FOR_I08 (INTVAL (x))) + *total = 0; + else if ((outer_code == AND || outer_code == IOR || outer_code == XOR) + && CONST_OK_FOR_K08 (INTVAL (x))) + *total = 1; + /* prepare_cmp_insn will force costly constants int registers before + the cbranch[sd]i4 patterns can see them, so preserve potentially + interesting ones not covered by I08 above. */ + else if (outer_code == COMPARE + && ((unsigned HOST_WIDE_INT) INTVAL (x) + == (unsigned HOST_WIDE_INT) 0x7fffffff + 1 + || INTVAL (x) == 0x7fffffff + || INTVAL (x) == 0x80 || INTVAL (x) == -0x81)) + *total = 1; + else + *total = 8; + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + if (TARGET_SHMEDIA64) + *total = COSTS_N_INSNS (4); + else if (TARGET_SHMEDIA32) + *total = COSTS_N_INSNS (2); + else + *total = 5; + return true; + + case CONST_DOUBLE: + if (TARGET_SHMEDIA) + *total = COSTS_N_INSNS (4); + /* prepare_cmp_insn will force costly constants int registers before + the cbranchdi4 pattern can see them, so preserve potentially + interesting ones. */ + else if (outer_code == COMPARE && GET_MODE (x) == DImode) + *total = 1; + else + *total = 10; + return true; + case CONST_VECTOR: + if (x == CONST0_RTX (GET_MODE (x))) + *total = 0; + else if (sh_1el_vec (x, VOIDmode)) + *total = outer_code != SET; + if (sh_rep_vec (x, VOIDmode)) + *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4 + + (outer_code != SET)); + *total = COSTS_N_INSNS (3) + (outer_code != SET); + return true; + + case PLUS: + case MINUS: + *total = COSTS_N_INSNS (addsubcosts (x)); + return true; + + case AND: + *total = COSTS_N_INSNS (andcosts (x)); + return true; + + case MULT: + *total = COSTS_N_INSNS (multcosts (x)); + return true; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + *total = COSTS_N_INSNS (shiftcosts (x)); + return true; + + case DIV: + case UDIV: + case MOD: + case UMOD: + *total = COSTS_N_INSNS (20); + return true; + + case PARALLEL: + if (sh_1el_vec (x, VOIDmode)) + *total = outer_code != SET; + if (sh_rep_vec (x, VOIDmode)) + *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4 + + (outer_code != SET)); + *total = COSTS_N_INSNS (3) + (outer_code != SET); + return true; + + case FLOAT: + case FIX: + *total = 100; + return true; + + default: + return false; + } +} + +/* Compute the cost of an address. For the SH, all valid addresses are + the same cost. Use a slightly higher cost for reg + reg addressing, + since it increases pressure on r0. */ + +static int +sh_address_cost (rtx X, + bool speed ATTRIBUTE_UNUSED) +{ + return (GET_CODE (X) == PLUS + && ! CONSTANT_P (XEXP (X, 1)) + && ! TARGET_SHMEDIA ? 1 : 0); +} + +/* Code to expand a shift. */ + +void +gen_ashift (int type, int n, rtx reg) +{ + /* Negative values here come from the shift_amounts array. */ + if (n < 0) + { + if (type == ASHIFT) + type = LSHIFTRT; + else + type = ASHIFT; + n = -n; + } + + switch (type) + { + case ASHIFTRT: + emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n))); + break; + case LSHIFTRT: + if (n == 1) + emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n))); + else + emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n))); + break; + case ASHIFT: + emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n))); + break; + } +} + +/* Same for HImode */ + +void +gen_ashift_hi (int type, int n, rtx reg) +{ + /* Negative values here come from the shift_amounts array. */ + if (n < 0) + { + if (type == ASHIFT) + type = LSHIFTRT; + else + type = ASHIFT; + n = -n; + } + + switch (type) + { + case ASHIFTRT: + case LSHIFTRT: + /* We don't have HImode right shift operations because using the + ordinary 32 bit shift instructions for that doesn't generate proper + zero/sign extension. + gen_ashift_hi is only called in contexts where we know that the + sign extension works out correctly. */ + { + int offset = 0; + if (GET_CODE (reg) == SUBREG) + { + offset = SUBREG_BYTE (reg); + reg = SUBREG_REG (reg); + } + gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset)); + break; + } + case ASHIFT: + emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n))); + break; + } +} + +/* Output RTL to split a constant shift into its component SH constant + shift instructions. */ + +void +gen_shifty_op (int code, rtx *operands) +{ + int value = INTVAL (operands[2]); + int max, i; + + /* Truncate the shift count in case it is out of bounds. */ + value = value & 31; + + if (value == 31) + { + if (code == LSHIFTRT) + { + emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); + emit_insn (gen_movt (operands[0])); + return; + } + else if (code == ASHIFT) + { + /* There is a two instruction sequence for 31 bit left shifts, + but it requires r0. */ + if (REG_P (operands[0]) && REGNO (operands[0]) == 0) + { + emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); + emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); + return; + } + } + } + else if (value == 0) + { + /* This can happen even when optimizing, if there were subregs before + reload. Don't output a nop here, as this is never optimized away; + use a no-op move instead. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0])); + return; + } + + max = shift_insns[value]; + for (i = 0; i < max; i++) + gen_ashift (code, shift_amounts[value][i], operands[0]); +} + +/* Same as above, but optimized for values where the topmost bits don't + matter. */ + +void +gen_shifty_hi_op (int code, rtx *operands) +{ + int value = INTVAL (operands[2]); + int max, i; + void (*gen_fun) (int, int, rtx); + + /* This operation is used by and_shl for SImode values with a few + high bits known to be cleared. */ + value &= 31; + if (value == 0) + { + emit_insn (gen_nop ()); + return; + } + + gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift; + if (code == ASHIFT) + { + max = ext_shift_insns[value]; + for (i = 0; i < max; i++) + gen_fun (code, ext_shift_amounts[value][i], operands[0]); + } + else + /* When shifting right, emit the shifts in reverse order, so that + solitary negative values come first. */ + for (i = ext_shift_insns[value] - 1; i >= 0; i--) + gen_fun (code, ext_shift_amounts[value][i], operands[0]); +} + +/* Output RTL for an arithmetic right shift. */ + +/* ??? Rewrite to use super-optimizer sequences. */ + +int +expand_ashiftrt (rtx *operands) +{ + rtx wrk; + char func[18]; + int value; + + if (TARGET_SH3) + { + if (!CONST_INT_P (operands[2])) + { + rtx count = copy_to_mode_reg (SImode, operands[2]); + emit_insn (gen_negsi2 (count, count)); + emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); + return 1; + } + else if (ashiftrt_insns[INTVAL (operands[2]) & 31] + > 1 + SH_DYNAMIC_SHIFT_COST) + { + rtx count + = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31))); + emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); + return 1; + } + } + if (!CONST_INT_P (operands[2])) + return 0; + + value = INTVAL (operands[2]) & 31; + + if (value == 31) + { + /* If we are called from abs expansion, arrange things so that we + we can use a single MT instruction that doesn't clobber the source, + if LICM can hoist out the load of the constant zero. */ + if (currently_expanding_to_rtl) + { + emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)), + operands[1])); + emit_insn (gen_mov_neg_si_t (operands[0])); + return 1; + } + emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); + return 1; + } + else if (value >= 16 && value <= 19) + { + wrk = gen_reg_rtx (SImode); + emit_insn (gen_ashrsi2_16 (wrk, operands[1])); + value -= 16; + while (value--) + gen_ashift (ASHIFTRT, 1, wrk); + emit_move_insn (operands[0], wrk); + return 1; + } + /* Expand a short sequence inline, longer call a magic routine. */ + else if (value <= 5) + { + wrk = gen_reg_rtx (SImode); + emit_move_insn (wrk, operands[1]); + while (value--) + gen_ashift (ASHIFTRT, 1, wrk); + emit_move_insn (operands[0], wrk); + return 1; + } + + wrk = gen_reg_rtx (Pmode); + + /* Load the value into an arg reg and call a helper. */ + emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); + sprintf (func, "__ashiftrt_r4_%d", value); + function_symbol (wrk, func, SFUNC_STATIC); + emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk)); + emit_move_insn (operands[0], gen_rtx_REG (SImode, 4)); + return 1; +} + +int +sh_dynamicalize_shift_p (rtx count) +{ + return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST; +} + +/* Try to find a good way to implement the combiner pattern + [(set (match_operand:SI 0 "register_operand" "r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n"))) . + LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3. + return 0 for simple right / left or left/right shift combination. + return 1 for a combination of shifts with zero_extend. + return 2 for a combination of shifts with an AND that needs r0. + return 3 for a combination of shifts with an AND that needs an extra + scratch register, when the three highmost bits of the AND mask are clear. + return 4 for a combination of shifts with an AND that needs an extra + scratch register, when any of the three highmost bits of the AND mask + is set. + If ATTRP is set, store an initial right shift width in ATTRP[0], + and the instruction length in ATTRP[1] . These values are not valid + when returning 0. + When ATTRP is set and returning 1, ATTRP[2] gets set to the index into + shift_amounts for the last shift value that is to be used before the + sign extend. */ +int +shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp) +{ + unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2; + int left = INTVAL (left_rtx), right; + int best = 0; + int cost, best_cost = 10000; + int best_right = 0, best_len = 0; + int i; + int can_ext; + + if (left < 0 || left > 31) + return 0; + if (CONST_INT_P (mask_rtx)) + mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left; + else + mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left; + /* Can this be expressed as a right shift / left shift pair? */ + lsb = ((mask ^ (mask - 1)) >> 1) + 1; + right = exact_log2 (lsb); + mask2 = ~(mask + lsb - 1); + lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1; + /* mask has no zeroes but trailing zeroes <==> ! mask2 */ + if (! mask2) + best_cost = shift_insns[right] + shift_insns[right + left]; + /* mask has no trailing zeroes <==> ! right */ + else if (! right && mask2 == ~(lsb2 - 1)) + { + int late_right = exact_log2 (lsb2); + best_cost = shift_insns[left + late_right] + shift_insns[late_right]; + } + /* Try to use zero extend. */ + if (mask2 == ~(lsb2 - 1)) + { + int width, first; + + for (width = 8; width <= 16; width += 8) + { + /* Can we zero-extend right away? */ + if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width) + { + cost + = 1 + ext_shift_insns[right] + ext_shift_insns[left + right]; + if (cost < best_cost) + { + best = 1; + best_cost = cost; + best_right = right; + best_len = cost; + if (attrp) + attrp[2] = -1; + } + continue; + } + /* ??? Could try to put zero extend into initial right shift, + or even shift a bit left before the right shift. */ + /* Determine value of first part of left shift, to get to the + zero extend cut-off point. */ + first = width - exact_log2 (lsb2) + right; + if (first >= 0 && right + left - first >= 0) + { + cost = ext_shift_insns[right] + ext_shift_insns[first] + 1 + + ext_shift_insns[right + left - first]; + if (cost < best_cost) + { + best = 1; + best_cost = cost; + best_right = right; + best_len = cost; + if (attrp) + attrp[2] = first; + } + } + } + } + /* Try to use r0 AND pattern */ + for (i = 0; i <= 2; i++) + { + if (i > right) + break; + if (! CONST_OK_FOR_K08 (mask >> i)) + continue; + cost = (i != 0) + 2 + ext_shift_insns[left + i]; + if (cost < best_cost) + { + best = 2; + best_cost = cost; + best_right = i; + best_len = cost - 1; + } + } + /* Try to use a scratch register to hold the AND operand. */ + can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0; + for (i = 0; i <= 2; i++) + { + if (i > right) + break; + cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3) + + (can_ext ? ext_shift_insns : shift_insns)[left + i]; + if (cost < best_cost) + { + best = 4 - can_ext; + best_cost = cost; + best_right = i; + best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i); + } + } + + if (attrp) + { + attrp[0] = best_right; + attrp[1] = best_len; + } + return best; +} + +/* This is used in length attributes of the unnamed instructions + corresponding to shl_and_kind return values of 1 and 2. */ +int +shl_and_length (rtx insn) +{ + rtx set_src, left_rtx, mask_rtx; + int attributes[3]; + + set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + left_rtx = XEXP (XEXP (set_src, 0), 1); + mask_rtx = XEXP (set_src, 1); + shl_and_kind (left_rtx, mask_rtx, attributes); + return attributes[1]; +} + +/* This is used in length attribute of the and_shl_scratch instruction. */ + +int +shl_and_scr_length (rtx insn) +{ + rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31]; + rtx op = XEXP (set_src, 0); + len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1; + op = XEXP (XEXP (op, 0), 0); + return len + shift_insns[INTVAL (XEXP (op, 1)) & 31]; +} + +/* Generate rtl for instructions for which shl_and_kind advised a particular + method of generating them, i.e. returned zero. */ + +int +gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source) +{ + int attributes[3]; + unsigned HOST_WIDE_INT mask; + int kind = shl_and_kind (left_rtx, mask_rtx, attributes); + int right, total_shift; + void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op; + + right = attributes[0]; + total_shift = INTVAL (left_rtx) + right; + mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift; + switch (kind) + { + default: + return -1; + case 1: + { + int first = attributes[2]; + rtx operands[3]; + + if (first < 0) + { + emit_insn ((mask << right) <= 0xff + ? gen_zero_extendqisi2 (dest, + gen_lowpart (QImode, source)) + : gen_zero_extendhisi2 (dest, + gen_lowpart (HImode, source))); + source = dest; + } + if (source != dest) + emit_insn (gen_movsi (dest, source)); + operands[0] = dest; + if (right) + { + operands[2] = GEN_INT (right); + gen_shifty_hi_op (LSHIFTRT, operands); + } + if (first > 0) + { + operands[2] = GEN_INT (first); + gen_shifty_hi_op (ASHIFT, operands); + total_shift -= first; + mask <<= first; + } + if (first >= 0) + emit_insn (mask <= 0xff + ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest)) + : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest))); + if (total_shift > 0) + { + operands[2] = GEN_INT (total_shift); + gen_shifty_hi_op (ASHIFT, operands); + } + break; + } + case 4: + shift_gen_fun = gen_shifty_op; + case 3: + /* If the topmost bit that matters is set, set the topmost bits + that don't matter. This way, we might be able to get a shorter + signed constant. */ + if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift))) + mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift); + case 2: + /* Don't expand fine-grained when combining, because that will + make the pattern fail. */ + if (currently_expanding_to_rtl + || reload_in_progress || reload_completed) + { + rtx operands[3]; + + /* Cases 3 and 4 should be handled by this split + only while combining */ + gcc_assert (kind <= 2); + if (right) + { + emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right))); + source = dest; + } + emit_insn (gen_andsi3 (dest, source, GEN_INT (mask))); + if (total_shift) + { + operands[0] = dest; + operands[1] = dest; + operands[2] = GEN_INT (total_shift); + shift_gen_fun (ASHIFT, operands); + } + break; + } + else + { + int neg = 0; + if (kind != 4 && total_shift < 16) + { + neg = -ext_shift_amounts[total_shift][1]; + if (neg > 0) + neg -= ext_shift_amounts[total_shift][2]; + else + neg = 0; + } + emit_insn (gen_and_shl_scratch (dest, source, + GEN_INT (right), + GEN_INT (mask), + GEN_INT (total_shift + neg), + GEN_INT (neg))); + emit_insn (gen_movsi (dest, dest)); + break; + } + } + return 0; +} + +/* Try to find a good way to implement the combiner pattern + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (match_operand:SI 3 "const_int_operand" "n") + (const_int 0))) + (clobber (reg:SI T_REG))] + LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3. + return 0 for simple left / right shift combination. + return 1 for left shift / 8 bit sign extend / left shift. + return 2 for left shift / 16 bit sign extend / left shift. + return 3 for left shift / 8 bit sign extend / shift / sign extend. + return 4 for left shift / 16 bit sign extend / shift / sign extend. + return 5 for left shift / 16 bit sign extend / right shift + return 6 for < 8 bit sign extend / left shift. + return 7 for < 8 bit sign extend / left shift / single right shift. + If COSTP is nonzero, assign the calculated cost to *COSTP. */ + +int +shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp) +{ + int left, size, insize, ext; + int cost = 0, best_cost; + int kind; + + left = INTVAL (left_rtx); + size = INTVAL (size_rtx); + insize = size - left; + gcc_assert (insize > 0); + /* Default to left / right shift. */ + kind = 0; + best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size]; + if (size <= 16) + { + /* 16 bit shift / sign extend / 16 bit shift */ + cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size]; + /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden + below, by alternative 3 or something even better. */ + if (cost < best_cost) + { + kind = 5; + best_cost = cost; + } + } + /* Try a plain sign extend between two shifts. */ + for (ext = 16; ext >= insize; ext -= 8) + { + if (ext <= size) + { + cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext]; + if (cost < best_cost) + { + kind = ext / (unsigned) 8; + best_cost = cost; + } + } + /* Check if we can do a sloppy shift with a final signed shift + restoring the sign. */ + if (EXT_SHIFT_SIGNED (size - ext)) + cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1; + /* If not, maybe it's still cheaper to do the second shift sloppy, + and do a final sign extend? */ + else if (size <= 16) + cost = ext_shift_insns[ext - insize] + 1 + + ext_shift_insns[size > ext ? size - ext : ext - size] + 1; + else + continue; + if (cost < best_cost) + { + kind = ext / (unsigned) 8 + 2; + best_cost = cost; + } + } + /* Check if we can sign extend in r0 */ + if (insize < 8) + { + cost = 3 + shift_insns[left]; + if (cost < best_cost) + { + kind = 6; + best_cost = cost; + } + /* Try the same with a final signed shift. */ + if (left < 31) + { + cost = 3 + ext_shift_insns[left + 1] + 1; + if (cost < best_cost) + { + kind = 7; + best_cost = cost; + } + } + } + if (TARGET_SH3) + { + /* Try to use a dynamic shift. */ + cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST; + if (cost < best_cost) + { + kind = 0; + best_cost = cost; + } + } + if (costp) + *costp = cost; + return kind; +} + +/* Function to be used in the length attribute of the instructions + implementing this pattern. */ + +int +shl_sext_length (rtx insn) +{ + rtx set_src, left_rtx, size_rtx; + int cost; + + set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + left_rtx = XEXP (XEXP (set_src, 0), 1); + size_rtx = XEXP (set_src, 1); + shl_sext_kind (left_rtx, size_rtx, &cost); + return cost; +} + +/* Generate rtl for this pattern */ + +int +gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source) +{ + int kind; + int left, size, insize, cost; + rtx operands[3]; + + kind = shl_sext_kind (left_rtx, size_rtx, &cost); + left = INTVAL (left_rtx); + size = INTVAL (size_rtx); + insize = size - left; + switch (kind) + { + case 1: + case 2: + case 3: + case 4: + { + int ext = kind & 1 ? 8 : 16; + int shift2 = size - ext; + + /* Don't expand fine-grained when combining, because that will + make the pattern fail. */ + if (! currently_expanding_to_rtl + && ! reload_in_progress && ! reload_completed) + { + emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); + emit_insn (gen_movsi (dest, source)); + break; + } + if (dest != source) + emit_insn (gen_movsi (dest, source)); + operands[0] = dest; + if (ext - insize) + { + operands[2] = GEN_INT (ext - insize); + gen_shifty_hi_op (ASHIFT, operands); + } + emit_insn (kind & 1 + ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) + : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); + if (kind <= 2) + { + if (shift2) + { + operands[2] = GEN_INT (shift2); + gen_shifty_op (ASHIFT, operands); + } + } + else + { + if (shift2 > 0) + { + if (EXT_SHIFT_SIGNED (shift2)) + { + operands[2] = GEN_INT (shift2 + 1); + gen_shifty_op (ASHIFT, operands); + operands[2] = const1_rtx; + gen_shifty_op (ASHIFTRT, operands); + break; + } + operands[2] = GEN_INT (shift2); + gen_shifty_hi_op (ASHIFT, operands); + } + else if (shift2) + { + operands[2] = GEN_INT (-shift2); + gen_shifty_hi_op (LSHIFTRT, operands); + } + emit_insn (size <= 8 + ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) + : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); + } + break; + } + case 5: + { + int i = 16 - size; + if (! currently_expanding_to_rtl + && ! reload_in_progress && ! reload_completed) + emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); + else + { + operands[0] = dest; + operands[2] = GEN_INT (16 - insize); + gen_shifty_hi_op (ASHIFT, operands); + emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); + } + /* Don't use gen_ashrsi3 because it generates new pseudos. */ + while (--i >= 0) + gen_ashift (ASHIFTRT, 1, dest); + break; + } + case 6: + case 7: + /* Don't expand fine-grained when combining, because that will + make the pattern fail. */ + if (! currently_expanding_to_rtl + && ! reload_in_progress && ! reload_completed) + { + emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); + emit_insn (gen_movsi (dest, source)); + break; + } + emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1))); + emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1)))); + emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1)))); + operands[0] = dest; + operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx; + gen_shifty_op (ASHIFT, operands); + if (kind == 7) + emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx)); + break; + default: + return -1; + } + return 0; +} + +/* Prefix a symbol_ref name with "datalabel". */ + +rtx +gen_datalabel_ref (rtx sym) +{ + const char *str; + + if (GET_CODE (sym) == LABEL_REF) + return gen_rtx_CONST (GET_MODE (sym), + gen_rtx_UNSPEC (GET_MODE (sym), + gen_rtvec (1, sym), + UNSPEC_DATALABEL)); + + gcc_assert (GET_CODE (sym) == SYMBOL_REF); + + str = XSTR (sym, 0); + /* Share all SYMBOL_REF strings with the same value - that is important + for cse. */ + str = IDENTIFIER_POINTER (get_identifier (str)); + XSTR (sym, 0) = str; + + return sym; +} + + +static alloc_pool label_ref_list_pool; + +typedef struct label_ref_list_d +{ + rtx label; + struct label_ref_list_d *next; +} *label_ref_list_t; + +/* The SH cannot load a large constant into a register, constants have to + come from a pc relative load. The reference of a pc relative load + instruction must be less than 1k in front of the instruction. This + means that we often have to dump a constant inside a function, and + generate code to branch around it. + + It is important to minimize this, since the branches will slow things + down and make things bigger. + + Worst case code looks like: + + mov.l L1,rn + bra L2 + nop + align + L1: .long value + L2: + .. + + mov.l L3,rn + bra L4 + nop + align + L3: .long value + L4: + .. + + We fix this by performing a scan before scheduling, which notices which + instructions need to have their operands fetched from the constant table + and builds the table. + + The algorithm is: + + scan, find an instruction which needs a pcrel move. Look forward, find the + last barrier which is within MAX_COUNT bytes of the requirement. + If there isn't one, make one. Process all the instructions between + the find and the barrier. + + In the above example, we can tell that L3 is within 1k of L1, so + the first move can be shrunk from the 3 insn+constant sequence into + just 1 insn, and the constant moved to L3 to make: + + mov.l L1,rn + .. + mov.l L3,rn + bra L4 + nop + align + L3:.long value + L4:.long value + + Then the second move becomes the target for the shortening process. */ + +typedef struct +{ + rtx value; /* Value in table. */ + rtx label; /* Label of value. */ + label_ref_list_t wend; /* End of window. */ + enum machine_mode mode; /* Mode of value. */ + + /* True if this constant is accessed as part of a post-increment + sequence. Note that HImode constants are never accessed in this way. */ + bool part_of_sequence_p; +} pool_node; + +/* The maximum number of constants that can fit into one pool, since + constants in the range 0..510 are at least 2 bytes long, and in the + range from there to 1018 at least 4 bytes. */ + +#define MAX_POOL_SIZE 372 +static pool_node pool_vector[MAX_POOL_SIZE]; +static int pool_size; +static rtx pool_window_label; +static int pool_window_last; + +static int max_labelno_before_reorg; + +/* ??? If we need a constant in HImode which is the truncated value of a + constant we need in SImode, we could combine the two entries thus saving + two bytes. Is this common enough to be worth the effort of implementing + it? */ + +/* ??? This stuff should be done at the same time that we shorten branches. + As it is now, we must assume that all branches are the maximum size, and + this causes us to almost always output constant pools sooner than + necessary. */ + +/* Add a constant to the pool and return its label. */ + +static rtx +add_constant (rtx x, enum machine_mode mode, rtx last_value) +{ + int i; + rtx lab, new_rtx; + label_ref_list_t ref, newref; + + /* First see if we've already got it. */ + for (i = 0; i < pool_size; i++) + { + if (x->code == pool_vector[i].value->code + && mode == pool_vector[i].mode) + { + if (x->code == CODE_LABEL) + { + if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) + continue; + } + if (rtx_equal_p (x, pool_vector[i].value)) + { + lab = new_rtx = 0; + if (! last_value + || ! i + || ! rtx_equal_p (last_value, pool_vector[i-1].value)) + { + new_rtx = gen_label_rtx (); + LABEL_REFS (new_rtx) = pool_vector[i].label; + pool_vector[i].label = lab = new_rtx; + } + if (lab && pool_window_label) + { + newref = (label_ref_list_t) pool_alloc (label_ref_list_pool); + newref->label = pool_window_label; + ref = pool_vector[pool_window_last].wend; + newref->next = ref; + pool_vector[pool_window_last].wend = newref; + } + if (new_rtx) + pool_window_label = new_rtx; + pool_window_last = i; + return lab; + } + } + } + + /* Need a new one. */ + pool_vector[pool_size].value = x; + if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value)) + { + lab = 0; + pool_vector[pool_size - 1].part_of_sequence_p = true; + } + else + lab = gen_label_rtx (); + pool_vector[pool_size].mode = mode; + pool_vector[pool_size].label = lab; + pool_vector[pool_size].wend = NULL; + pool_vector[pool_size].part_of_sequence_p = (lab == 0); + if (lab && pool_window_label) + { + newref = (label_ref_list_t) pool_alloc (label_ref_list_pool); + newref->label = pool_window_label; + ref = pool_vector[pool_window_last].wend; + newref->next = ref; + pool_vector[pool_window_last].wend = newref; + } + if (lab) + pool_window_label = lab; + pool_window_last = pool_size; + pool_size++; + return lab; +} + +/* Output the literal table. START, if nonzero, is the first instruction + this table is needed for, and also indicates that there is at least one + casesi_worker_2 instruction; We have to emit the operand3 labels from + these insns at a 4-byte aligned position. BARRIER is the barrier + after which we are to place the table. */ + +static void +dump_table (rtx start, rtx barrier) +{ + rtx scan = barrier; + int i; + int need_align = 1; + rtx lab; + label_ref_list_t ref; + int have_df = 0; + + /* Do two passes, first time dump out the HI sized constants. */ + + for (i = 0; i < pool_size; i++) + { + pool_node *p = &pool_vector[i]; + + if (p->mode == HImode) + { + if (need_align) + { + scan = emit_insn_after (gen_align_2 (), scan); + need_align = 0; + } + for (lab = p->label; lab; lab = LABEL_REFS (lab)) + scan = emit_label_after (lab, scan); + scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx), + scan); + for (ref = p->wend; ref; ref = ref->next) + { + lab = ref->label; + scan = emit_insn_after (gen_consttable_window_end (lab), scan); + } + } + else if (p->mode == DFmode) + have_df = 1; + } + + need_align = 1; + + if (start) + { + scan = emit_insn_after (gen_align_4 (), scan); + need_align = 0; + for (; start != barrier; start = NEXT_INSN (start)) + if (NONJUMP_INSN_P (start) + && recog_memoized (start) == CODE_FOR_casesi_worker_2) + { + rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0)); + rtx lab = XEXP (XVECEXP (src, 0, 3), 0); + + scan = emit_label_after (lab, scan); + } + } + if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df) + { + rtx align_insn = NULL_RTX; + + scan = emit_label_after (gen_label_rtx (), scan); + scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); + need_align = 0; + + for (i = 0; i < pool_size; i++) + { + pool_node *p = &pool_vector[i]; + + switch (p->mode) + { + case HImode: + break; + case SImode: + case SFmode: + if (align_insn && !p->part_of_sequence_p) + { + for (lab = p->label; lab; lab = LABEL_REFS (lab)) + emit_label_before (lab, align_insn); + emit_insn_before (gen_consttable_4 (p->value, const0_rtx), + align_insn); + for (ref = p->wend; ref; ref = ref->next) + { + lab = ref->label; + emit_insn_before (gen_consttable_window_end (lab), + align_insn); + } + delete_insn (align_insn); + align_insn = NULL_RTX; + continue; + } + else + { + for (lab = p->label; lab; lab = LABEL_REFS (lab)) + scan = emit_label_after (lab, scan); + scan = emit_insn_after (gen_consttable_4 (p->value, + const0_rtx), scan); + need_align = ! need_align; + } + break; + case DFmode: + if (need_align) + { + scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); + align_insn = scan; + need_align = 0; + } + case DImode: + for (lab = p->label; lab; lab = LABEL_REFS (lab)) + scan = emit_label_after (lab, scan); + scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), + scan); + break; + default: + gcc_unreachable (); + } + + if (p->mode != HImode) + { + for (ref = p->wend; ref; ref = ref->next) + { + lab = ref->label; + scan = emit_insn_after (gen_consttable_window_end (lab), + scan); + } + } + } + + pool_size = 0; + } + + for (i = 0; i < pool_size; i++) + { + pool_node *p = &pool_vector[i]; + + switch (p->mode) + { + case HImode: + break; + case SImode: + case SFmode: + if (need_align) + { + need_align = 0; + scan = emit_label_after (gen_label_rtx (), scan); + scan = emit_insn_after (gen_align_4 (), scan); + } + for (lab = p->label; lab; lab = LABEL_REFS (lab)) + scan = emit_label_after (lab, scan); + scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx), + scan); + break; + case DFmode: + case DImode: + if (need_align) + { + need_align = 0; + scan = emit_label_after (gen_label_rtx (), scan); + scan = emit_insn_after (gen_align_4 (), scan); + } + for (lab = p->label; lab; lab = LABEL_REFS (lab)) + scan = emit_label_after (lab, scan); + scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), + scan); + break; + default: + gcc_unreachable (); + } + + if (p->mode != HImode) + { + for (ref = p->wend; ref; ref = ref->next) + { + lab = ref->label; + scan = emit_insn_after (gen_consttable_window_end (lab), scan); + } + } + } + + scan = emit_insn_after (gen_consttable_end (), scan); + scan = emit_barrier_after (scan); + pool_size = 0; + pool_window_label = NULL_RTX; + pool_window_last = 0; +} + +/* Return nonzero if constant would be an ok source for a + mov.w instead of a mov.l. */ + +static int +hi_const (rtx src) +{ + return (CONST_INT_P (src) + && INTVAL (src) >= -32768 + && INTVAL (src) <= 32767); +} + +#define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0) + +/* Nonzero if the insn is a move instruction which needs to be fixed. */ + +/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the + CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't + need to fix it if the input value is CONST_OK_FOR_I08. */ + +static int +broken_move (rtx insn) +{ + if (NONJUMP_INSN_P (insn)) + { + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) == SET + /* We can load any 8-bit value if we don't care what the high + order bits end up as. */ + && GET_MODE (SET_DEST (pat)) != QImode + && (CONSTANT_P (SET_SRC (pat)) + /* Match mova_const. */ + || (GET_CODE (SET_SRC (pat)) == UNSPEC + && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA + && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST)) + && ! (TARGET_SH2E + && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE + && (fp_zero_operand (SET_SRC (pat)) + || fp_one_operand (SET_SRC (pat))) + /* In general we don't know the current setting of fpscr, so disable fldi. + There is an exception if this was a register-register move + before reload - and hence it was ascertained that we have + single precision setting - and in a post-reload optimization + we changed this to do a constant load. In that case + we don't have an r0 clobber, hence we must use fldi. */ + && (TARGET_FMOVD + || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0)) + == SCRATCH)) + && REG_P (SET_DEST (pat)) + && FP_REGISTER_P (REGNO (SET_DEST (pat)))) + && ! (TARGET_SH2A + && GET_MODE (SET_DEST (pat)) == SImode + && (satisfies_constraint_I20 (SET_SRC (pat)) + || satisfies_constraint_I28 (SET_SRC (pat)))) + && ! satisfies_constraint_I08 (SET_SRC (pat))) + return 1; + } + + return 0; +} + +static int +mova_p (rtx insn) +{ + return (NONJUMP_INSN_P (insn) + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC + && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA + /* Don't match mova_const. */ + && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF); +} + +/* Fix up a mova from a switch that went out of range. */ +static void +fixup_mova (rtx mova) +{ + PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode); + if (! flag_pic) + { + SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova); + INSN_CODE (mova) = -1; + } + else + { + rtx worker = mova; + rtx lab = gen_label_rtx (); + rtx wpat, wpat0, wpat1, wsrc, target, base, diff; + + do + { + worker = NEXT_INSN (worker); + gcc_assert (worker + && !LABEL_P (worker) + && !JUMP_P (worker)); + } while (NOTE_P (worker) + || recog_memoized (worker) != CODE_FOR_casesi_worker_1); + wpat = PATTERN (worker); + wpat0 = XVECEXP (wpat, 0, 0); + wpat1 = XVECEXP (wpat, 0, 1); + wsrc = SET_SRC (wpat0); + PATTERN (worker) = (gen_casesi_worker_2 + (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1), + XEXP (XVECEXP (wsrc, 0, 2), 0), lab, + XEXP (wpat1, 0))); + INSN_CODE (worker) = -1; + target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); + base = gen_rtx_LABEL_REF (Pmode, lab); + diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF); + SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff); + INSN_CODE (mova) = -1; + } +} + +/* NEW_MOVA is a mova we've just encountered while scanning forward. Update + *num_mova, and check if the new mova is not nested within the first one. + return 0 if *first_mova was replaced, 1 if new_mova was replaced, + 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */ +static int +untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova) +{ + int n_addr = 0; /* Initialization to shut up spurious warning. */ + int f_target, n_target = 0; /* Likewise. */ + + if (optimize) + { + /* If NEW_MOVA has no address yet, it will be handled later. */ + if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova)) + return -1; + + n_addr = INSN_ADDRESSES (INSN_UID (new_mova)); + n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0))); + if (n_addr > n_target || n_addr + 1022 < n_target) + { + /* Change the mova into a load. + broken_move will then return true for it. */ + fixup_mova (new_mova); + return 1; + } + } + if (!(*num_mova)++) + { + *first_mova = new_mova; + return 2; + } + if (!optimize + || ((f_target + = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0)))) + >= n_target)) + return -1; + + (*num_mova)--; + if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova)) + > n_target - n_addr) + { + fixup_mova (*first_mova); + return 0; + } + else + { + fixup_mova (new_mova); + return 1; + } +} + +/* Find the last barrier from insn FROM which is close enough to hold the + constant pool. If we can't find one, then create one near the end of + the range. */ + +static rtx +find_barrier (int num_mova, rtx mova, rtx from) +{ + int count_si = 0; + int count_hi = 0; + int found_hi = 0; + int found_si = 0; + int found_di = 0; + int hi_align = 2; + int si_align = 2; + int leading_mova = num_mova; + rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0; + int si_limit; + int hi_limit; + rtx orig = from; + rtx last_got = NULL_RTX; + rtx last_symoff = NULL_RTX; + + /* For HImode: range is 510, add 4 because pc counts from address of + second instruction after this one, subtract 2 for the jump instruction + that we may need to emit before the table, subtract 2 for the instruction + that fills the jump delay slot (in very rare cases, reorg will take an + instruction from after the constant pool or will leave the delay slot + empty). This gives 510. + For SImode: range is 1020, add 4 because pc counts from address of + second instruction after this one, subtract 2 in case pc is 2 byte + aligned, subtract 2 for the jump instruction that we may need to emit + before the table, subtract 2 for the instruction that fills the jump + delay slot. This gives 1018. */ + + /* The branch will always be shortened now that the reference address for + forward branches is the successor address, thus we need no longer make + adjustments to the [sh]i_limit for -O0. */ + + si_limit = 1018; + hi_limit = 510; + + while (from && count_si < si_limit && count_hi < hi_limit) + { + int inc = get_attr_length (from); + int new_align = 1; + + /* If this is a label that existed at the time of the compute_alignments + call, determine the alignment. N.B. When find_barrier recurses for + an out-of-reach mova, we might see labels at the start of previously + inserted constant tables. */ + if (LABEL_P (from) + && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg) + { + if (optimize) + new_align = 1 << label_to_alignment (from); + else if (BARRIER_P (prev_nonnote_insn (from))) + new_align = 1 << barrier_align (from); + else + new_align = 1; + inc = 0; + } + /* In case we are scanning a constant table because of recursion, check + for explicit alignments. If the table is long, we might be forced + to emit the new table in front of it; the length of the alignment + might be the last straw. */ + else if (NONJUMP_INSN_P (from) + && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE + && XINT (PATTERN (from), 1) == UNSPECV_ALIGN) + new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0)); + /* When we find the end of a constant table, paste the new constant + at the end. That is better than putting it in front because + this way, we don't need extra alignment for adding a 4-byte-aligned + mov(a) label to a 2/4 or 8/4 byte aligned table. */ + else if (NONJUMP_INSN_P (from) + && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE + && XINT (PATTERN (from), 1) == UNSPECV_CONST_END) + return from; + + if (BARRIER_P (from)) + { + rtx next; + + found_barrier = from; + + /* If we are at the end of the function, or in front of an alignment + instruction, we need not insert an extra alignment. We prefer + this kind of barrier. */ + if (barrier_align (from) > 2) + good_barrier = from; + + /* If we are at the end of a hot/cold block, dump the constants + here. */ + next = NEXT_INSN (from); + if (next + && NOTE_P (next) + && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS) + break; + } + + if (broken_move (from)) + { + rtx pat, src, dst; + enum machine_mode mode; + + pat = PATTERN (from); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + src = SET_SRC (pat); + dst = SET_DEST (pat); + mode = GET_MODE (dst); + + /* GOT pcrelat setting comes in pair of + mova .L8,r0 + mov.l .L8,r12 + instructions. (plus add r0,r12). + Remember if we see one without the other. */ + if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0))) + last_got = last_got ? NULL_RTX : from; + else if (PIC_ADDR_P (src)) + last_got = last_got ? NULL_RTX : from; + + /* We must explicitly check the mode, because sometimes the + front end will generate code to load unsigned constants into + HImode targets without properly sign extending them. */ + if (mode == HImode + || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG)) + { + found_hi += 2; + /* We put the short constants before the long constants, so + we must count the length of short constants in the range + for the long constants. */ + /* ??? This isn't optimal, but is easy to do. */ + si_limit -= 2; + } + else + { + /* We dump DF/DI constants before SF/SI ones, because + the limit is the same, but the alignment requirements + are higher. We may waste up to 4 additional bytes + for alignment, and the DF/DI constant may have + another SF/SI constant placed before it. */ + if (TARGET_SHCOMPACT + && ! found_di + && (mode == DFmode || mode == DImode)) + { + found_di = 1; + si_limit -= 8; + } + while (si_align > 2 && found_si + si_align - 2 > count_si) + si_align >>= 1; + if (found_si > count_si) + count_si = found_si; + found_si += GET_MODE_SIZE (mode); + if (num_mova) + si_limit -= GET_MODE_SIZE (mode); + } + } + + if (mova_p (from)) + { + switch (untangle_mova (&num_mova, &mova, from)) + { + case 1: + if (flag_pic) + { + rtx src = SET_SRC (PATTERN (from)); + if (GET_CODE (src) == CONST + && GET_CODE (XEXP (src, 0)) == UNSPEC + && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF) + last_symoff = from; + } + break; + case 0: return find_barrier (0, 0, mova); + case 2: + { + leading_mova = 0; + barrier_before_mova + = good_barrier ? good_barrier : found_barrier; + } + default: break; + } + if (found_si > count_si) + count_si = found_si; + } + else if (JUMP_TABLE_DATA_P (from)) + { + if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode) + || (num_mova + && (prev_nonnote_insn (from) + == XEXP (MOVA_LABELREF (mova), 0)))) + num_mova--; + if (barrier_align (next_real_insn (from)) == align_jumps_log) + { + /* We have just passed the barrier in front of the + ADDR_DIFF_VEC, which is stored in found_barrier. Since + the ADDR_DIFF_VEC is accessed as data, just like our pool + constants, this is a good opportunity to accommodate what + we have gathered so far. + If we waited any longer, we could end up at a barrier in + front of code, which gives worse cache usage for separated + instruction / data caches. */ + good_barrier = found_barrier; + break; + } + else + { + rtx body = PATTERN (from); + inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body)); + } + } + /* For the SH1, we generate alignments even after jumps-around-jumps. */ + else if (JUMP_P (from) + && ! TARGET_SH2 + && ! optimize_size) + new_align = 4; + + /* There is a possibility that a bf is transformed into a bf/s by the + delay slot scheduler. */ + if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from) + && get_attr_type (from) == TYPE_CBRANCH + && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE) + inc += 2; + + if (found_si) + { + count_si += inc; + if (new_align > si_align) + { + si_limit -= (count_si - 1) & (new_align - si_align); + si_align = new_align; + } + count_si = (count_si + new_align - 1) & -new_align; + } + if (found_hi) + { + count_hi += inc; + if (new_align > hi_align) + { + hi_limit -= (count_hi - 1) & (new_align - hi_align); + hi_align = new_align; + } + count_hi = (count_hi + new_align - 1) & -new_align; + } + from = NEXT_INSN (from); + } + + if (num_mova) + { + if (leading_mova) + { + /* Try as we might, the leading mova is out of range. Change + it into a load (which will become a pcload) and retry. */ + fixup_mova (mova); + return find_barrier (0, 0, mova); + } + else + { + /* Insert the constant pool table before the mova instruction, + to prevent the mova label reference from going out of range. */ + from = mova; + good_barrier = found_barrier = barrier_before_mova; + } + } + + if (found_barrier) + { + if (good_barrier && next_real_insn (found_barrier)) + found_barrier = good_barrier; + } + else + { + /* We didn't find a barrier in time to dump our stuff, + so we'll make one. */ + rtx label = gen_label_rtx (); + + /* Don't emit a constant table in the middle of insns for + casesi_worker_2. This is a bit overkill but is enough + because casesi_worker_2 wouldn't appear so frequently. */ + if (last_symoff) + from = last_symoff; + + /* If we exceeded the range, then we must back up over the last + instruction we looked at. Otherwise, we just need to undo the + NEXT_INSN at the end of the loop. */ + if (PREV_INSN (from) != orig + && (count_hi > hi_limit || count_si > si_limit)) + from = PREV_INSN (PREV_INSN (from)); + else + from = PREV_INSN (from); + + /* Don't emit a constant table int the middle of global pointer setting, + since that that would move the addressing base GOT into another table. + We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_ + in the pool anyway, so just move up the whole constant pool. */ + if (last_got) + from = PREV_INSN (last_got); + + /* Don't insert the constant pool table at the position which + may be the landing pad. */ + if (flag_exceptions + && CALL_P (from) + && find_reg_note (from, REG_EH_REGION, NULL_RTX)) + from = PREV_INSN (from); + + /* Walk back to be just before any jump or label. + Putting it before a label reduces the number of times the branch + around the constant pool table will be hit. Putting it before + a jump makes it more likely that the bra delay slot will be + filled. */ + while (NOTE_P (from) || JUMP_P (from) + || LABEL_P (from)) + from = PREV_INSN (from); + + from = emit_jump_insn_after (gen_jump (label), from); + JUMP_LABEL (from) = label; + LABEL_NUSES (label) = 1; + found_barrier = emit_barrier_after (from); + emit_label_after (label, found_barrier); + } + + return found_barrier; +} + +/* If the instruction INSN is implemented by a special function, and we can + positively find the register that is used to call the sfunc, and this + register is not used anywhere else in this instruction - except as the + destination of a set, return this register; else, return 0. */ +rtx +sfunc_uses_reg (rtx insn) +{ + int i; + rtx pattern, part, reg_part, reg; + + if (!NONJUMP_INSN_P (insn)) + return 0; + pattern = PATTERN (insn); + if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC) + return 0; + + for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--) + { + part = XVECEXP (pattern, 0, i); + if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode) + reg_part = part; + } + if (! reg_part) + return 0; + reg = XEXP (reg_part, 0); + for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--) + { + part = XVECEXP (pattern, 0, i); + if (part == reg_part || GET_CODE (part) == CLOBBER) + continue; + if (reg_mentioned_p (reg, ((GET_CODE (part) == SET + && REG_P (SET_DEST (part))) + ? SET_SRC (part) : part))) + return 0; + } + return reg; +} + +/* See if the only way in which INSN uses REG is by calling it, or by + setting it while calling it. Set *SET to a SET rtx if the register + is set by INSN. */ + +static int +noncall_uses_reg (rtx reg, rtx insn, rtx *set) +{ + rtx pattern, reg2; + + *set = NULL_RTX; + + reg2 = sfunc_uses_reg (insn); + if (reg2 && REGNO (reg2) == REGNO (reg)) + { + pattern = single_set (insn); + if (pattern + && REG_P (SET_DEST (pattern)) + && REGNO (reg) == REGNO (SET_DEST (pattern))) + *set = pattern; + return 0; + } + if (!CALL_P (insn)) + { + /* We don't use rtx_equal_p because we don't care if the mode is + different. */ + pattern = single_set (insn); + if (pattern + && REG_P (SET_DEST (pattern)) + && REGNO (reg) == REGNO (SET_DEST (pattern))) + { + rtx par, part; + int i; + + *set = pattern; + par = PATTERN (insn); + if (GET_CODE (par) == PARALLEL) + for (i = XVECLEN (par, 0) - 1; i >= 0; i--) + { + part = XVECEXP (par, 0, i); + if (GET_CODE (part) != SET && reg_mentioned_p (reg, part)) + return 1; + } + return reg_mentioned_p (reg, SET_SRC (pattern)); + } + + return 1; + } + + pattern = PATTERN (insn); + + if (GET_CODE (pattern) == PARALLEL) + { + int i; + + for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--) + if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i))) + return 1; + pattern = XVECEXP (pattern, 0, 0); + } + + if (GET_CODE (pattern) == SET) + { + if (reg_mentioned_p (reg, SET_DEST (pattern))) + { + /* We don't use rtx_equal_p, because we don't care if the + mode is different. */ + if (!REG_P (SET_DEST (pattern)) + || REGNO (reg) != REGNO (SET_DEST (pattern))) + return 1; + + *set = pattern; + } + + pattern = SET_SRC (pattern); + } + + if (GET_CODE (pattern) != CALL + || !MEM_P (XEXP (pattern, 0)) + || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0))) + return 1; + + return 0; +} + +/* Given a X, a pattern of an insn or a part of it, return a mask of used + general registers. Bits 0..15 mean that the respective registers + are used as inputs in the instruction. Bits 16..31 mean that the + registers 0..15, respectively, are used as outputs, or are clobbered. + IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */ +int +regs_used (rtx x, int is_dest) +{ + enum rtx_code code; + const char *fmt; + int i, used = 0; + + if (! x) + return used; + code = GET_CODE (x); + switch (code) + { + case REG: + if (REGNO (x) < 16) + return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) + << (REGNO (x) + is_dest)); + return 0; + case SUBREG: + { + rtx y = SUBREG_REG (x); + + if (!REG_P (y)) + break; + if (REGNO (y) < 16) + return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) + << (REGNO (y) + + subreg_regno_offset (REGNO (y), + GET_MODE (y), + SUBREG_BYTE (x), + GET_MODE (x)) + is_dest)); + return 0; + } + case SET: + return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16); + case RETURN: + /* If there was a return value, it must have been indicated with USE. */ + return 0x00ffff00; + case CLOBBER: + is_dest = 1; + break; + case MEM: + is_dest = 0; + break; + case CALL: + used |= 0x00ff00f0; + break; + default: + break; + } + + fmt = GET_RTX_FORMAT (code); + + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + used |= regs_used (XVECEXP (x, i, j), is_dest); + } + else if (fmt[i] == 'e') + used |= regs_used (XEXP (x, i), is_dest); + } + return used; +} + +/* Create an instruction that prevents redirection of a conditional branch + to the destination of the JUMP with address ADDR. + If the branch needs to be implemented as an indirect jump, try to find + a scratch register for it. + If NEED_BLOCK is 0, don't do anything unless we need a scratch register. + If any preceding insn that doesn't fit into a delay slot is good enough, + pass 1. Pass 2 if a definite blocking insn is needed. + -1 is used internally to avoid deep recursion. + If a blocking instruction is made or recognized, return it. */ + +static rtx +gen_block_redirect (rtx jump, int addr, int need_block) +{ + int dead = 0; + rtx prev = prev_nonnote_insn (jump); + rtx dest; + + /* First, check if we already have an instruction that satisfies our need. */ + if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev)) + { + if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) + return prev; + if (GET_CODE (PATTERN (prev)) == USE + || GET_CODE (PATTERN (prev)) == CLOBBER + || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) + prev = jump; + else if ((need_block &= ~1) < 0) + return prev; + else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect) + need_block = 0; + } + if (GET_CODE (PATTERN (jump)) == RETURN) + { + if (! need_block) + return prev; + /* Reorg even does nasty things with return insns that cause branches + to go out of range - see find_end_label and callers. */ + return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump); + } + /* We can't use JUMP_LABEL here because it might be undefined + when not optimizing. */ + dest = XEXP (SET_SRC (PATTERN (jump)), 0); + /* If the branch is out of range, try to find a scratch register for it. */ + if (optimize + && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092 + > 4092 + 4098)) + { + rtx scan; + /* Don't look for the stack pointer as a scratch register, + it would cause trouble if an interrupt occurred. */ + unsigned attempt = 0x7fff, used; + int jump_left = flag_expensive_optimizations + 1; + + /* It is likely that the most recent eligible instruction is wanted for + the delay slot. Therefore, find out which registers it uses, and + try to avoid using them. */ + + for (scan = jump; (scan = PREV_INSN (scan)); ) + { + enum rtx_code code; + + if (INSN_DELETED_P (scan)) + continue; + code = GET_CODE (scan); + if (code == CODE_LABEL || code == JUMP_INSN) + break; + if (code == INSN + && GET_CODE (PATTERN (scan)) != USE + && GET_CODE (PATTERN (scan)) != CLOBBER + && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES) + { + attempt &= ~regs_used (PATTERN (scan), 0); + break; + } + } + for (used = dead = 0, scan = JUMP_LABEL (jump); + (scan = NEXT_INSN (scan)); ) + { + enum rtx_code code; + + if (INSN_DELETED_P (scan)) + continue; + code = GET_CODE (scan); + if (INSN_P (scan)) + { + used |= regs_used (PATTERN (scan), 0); + if (code == CALL_INSN) + used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0); + dead |= (used >> 16) & ~used; + if (dead & attempt) + { + dead &= attempt; + break; + } + if (code == JUMP_INSN) + { + if (jump_left-- && simplejump_p (scan)) + scan = JUMP_LABEL (scan); + else + break; + } + } + } + /* Mask out the stack pointer again, in case it was + the only 'free' register we have found. */ + dead &= 0x7fff; + } + /* If the immediate destination is still in range, check for possible + threading with a jump beyond the delay slot insn. + Don't check if we are called recursively; the jump has been or will be + checked in a different invocation then. */ + + else if (optimize && need_block >= 0) + { + rtx next = next_active_insn (next_active_insn (dest)); + if (next && JUMP_P (next) + && GET_CODE (PATTERN (next)) == SET + && recog_memoized (next) == CODE_FOR_jump_compact) + { + dest = JUMP_LABEL (next); + if (dest + && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092 + > 4092 + 4098)) + gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1); + } + } + + if (dead) + { + rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead)); + + /* It would be nice if we could convert the jump into an indirect + jump / far branch right now, and thus exposing all constituent + instructions to further optimization. However, reorg uses + simplejump_p to determine if there is an unconditional jump where + it should try to schedule instructions from the target of the + branch; simplejump_p fails for indirect jumps even if they have + a JUMP_LABEL. */ + rtx insn = emit_insn_before (gen_indirect_jump_scratch + (reg, GEN_INT (unspec_bbr_uid++)), + jump); + /* ??? We would like this to have the scope of the jump, but that + scope will change when a delay slot insn of an inner scope is added. + Hence, after delay slot scheduling, we'll have to expect + NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and + the jump. */ + + INSN_LOCATOR (insn) = INSN_LOCATOR (jump); + INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch; + return insn; + } + else if (need_block) + /* We can't use JUMP_LABEL here because it might be undefined + when not optimizing. */ + return emit_insn_before (gen_block_branch_redirect + (GEN_INT (unspec_bbr_uid++)), + jump); + return prev; +} + +#define CONDJUMP_MIN -252 +#define CONDJUMP_MAX 262 +struct far_branch +{ + /* A label (to be placed) in front of the jump + that jumps to our ultimate destination. */ + rtx near_label; + /* Where we are going to insert it if we cannot move the jump any farther, + or the jump itself if we have picked up an existing jump. */ + rtx insert_place; + /* The ultimate destination. */ + rtx far_label; + struct far_branch *prev; + /* If the branch has already been created, its address; + else the address of its first prospective user. */ + int address; +}; + +static void gen_far_branch (struct far_branch *); +enum mdep_reorg_phase_e mdep_reorg_phase; +static void +gen_far_branch (struct far_branch *bp) +{ + rtx insn = bp->insert_place; + rtx jump; + rtx label = gen_label_rtx (); + int ok; + + emit_label_after (label, insn); + if (bp->far_label) + { + jump = emit_jump_insn_after (gen_jump (bp->far_label), insn); + LABEL_NUSES (bp->far_label)++; + } + else + jump = emit_jump_insn_after (gen_return (), insn); + /* Emit a barrier so that reorg knows that any following instructions + are not reachable via a fall-through path. + But don't do this when not optimizing, since we wouldn't suppress the + alignment for the barrier then, and could end up with out-of-range + pc-relative loads. */ + if (optimize) + emit_barrier_after (jump); + emit_label_after (bp->near_label, insn); + JUMP_LABEL (jump) = bp->far_label; + ok = invert_jump (insn, label, 1); + gcc_assert (ok); + + /* If we are branching around a jump (rather than a return), prevent + reorg from using an insn from the jump target as the delay slot insn - + when reorg did this, it pessimized code (we rather hide the delay slot) + and it could cause branches to go out of range. */ + if (bp->far_label) + (emit_insn_after + (gen_stuff_delay_slot + (GEN_INT (unspec_bbr_uid++), + GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)), + insn)); + /* Prevent reorg from undoing our splits. */ + gen_block_redirect (jump, bp->address += 2, 2); +} + +/* Fix up ADDR_DIFF_VECs. */ +void +fixup_addr_diff_vecs (rtx first) +{ + rtx insn; + + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx vec_lab, pat, prev, prevpat, x, braf_label; + + if (!JUMP_P (insn) + || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) + continue; + pat = PATTERN (insn); + vec_lab = XEXP (XEXP (pat, 0), 0); + + /* Search the matching casesi_jump_2. */ + for (prev = vec_lab; ; prev = PREV_INSN (prev)) + { + if (!JUMP_P (prev)) + continue; + prevpat = PATTERN (prev); + if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2) + continue; + x = XVECEXP (prevpat, 0, 1); + if (GET_CODE (x) != USE) + continue; + x = XEXP (x, 0); + if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab) + break; + } + /* FIXME: This is a bug in the optimizer, but it seems harmless + to just avoid panicing. */ + if (!prev) + continue; + + /* Emit the reference label of the braf where it belongs, right after + the casesi_jump_2 (i.e. braf). */ + braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0); + emit_label_after (braf_label, prev); + + /* Fix up the ADDR_DIF_VEC to be relative + to the reference address of the braf. */ + XEXP (XEXP (pat, 0), 0) = braf_label; + } +} + +/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following + a barrier. Return the base 2 logarithm of the desired alignment. */ +int +barrier_align (rtx barrier_or_label) +{ + rtx next = next_real_insn (barrier_or_label), pat, prev; + int slot, credit, jump_to_next = 0; + + if (! next) + return 0; + + pat = PATTERN (next); + + if (GET_CODE (pat) == ADDR_DIFF_VEC) + return 2; + + if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN) + /* This is a barrier in front of a constant table. */ + return 0; + + prev = prev_real_insn (barrier_or_label); + if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC) + { + pat = PATTERN (prev); + /* If this is a very small table, we want to keep the alignment after + the table to the minimum for proper code alignment. */ + return ((optimize_size + || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat)) + <= (unsigned) 1 << (CACHE_LOG - 2))) + ? 1 << TARGET_SHMEDIA : align_jumps_log); + } + + if (optimize_size) + return 0; + + if (! TARGET_SH2 || ! optimize) + return align_jumps_log; + + /* When fixing up pcloads, a constant table might be inserted just before + the basic block that ends with the barrier. Thus, we can't trust the + instruction lengths before that. */ + if (mdep_reorg_phase > SH_FIXUP_PCLOAD) + { + /* Check if there is an immediately preceding branch to the insn beyond + the barrier. We must weight the cost of discarding useful information + from the current cache line when executing this branch and there is + an alignment, against that of fetching unneeded insn in front of the + branch target when there is no alignment. */ + + /* There are two delay_slot cases to consider. One is the simple case + where the preceding branch is to the insn beyond the barrier (simple + delay slot filling), and the other is where the preceding branch has + a delay slot that is a duplicate of the insn after the barrier + (fill_eager_delay_slots) and the branch is to the insn after the insn + after the barrier. */ + + /* PREV is presumed to be the JUMP_INSN for the barrier under + investigation. Skip to the insn before it. */ + prev = prev_real_insn (prev); + + for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2; + credit >= 0 && prev && NONJUMP_INSN_P (prev); + prev = prev_real_insn (prev)) + { + jump_to_next = 0; + if (GET_CODE (PATTERN (prev)) == USE + || GET_CODE (PATTERN (prev)) == CLOBBER) + continue; + if (GET_CODE (PATTERN (prev)) == SEQUENCE) + { + prev = XVECEXP (PATTERN (prev), 0, 1); + if (INSN_UID (prev) == INSN_UID (next)) + { + /* Delay slot was filled with insn at jump target. */ + jump_to_next = 1; + continue; + } + } + + if (slot && + get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) + slot = 0; + credit -= get_attr_length (prev); + } + if (prev + && JUMP_P (prev) + && JUMP_LABEL (prev)) + { + rtx x; + if (jump_to_next + || next_real_insn (JUMP_LABEL (prev)) == next + /* If relax_delay_slots() decides NEXT was redundant + with some previous instruction, it will have + redirected PREV's jump to the following insn. */ + || JUMP_LABEL (prev) == next_nonnote_insn (next) + /* There is no upper bound on redundant instructions + that might have been skipped, but we must not put an + alignment where none had been before. */ + || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))), + (INSN_P (x) + && (INSN_CODE (x) == CODE_FOR_block_branch_redirect + || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch + || INSN_CODE (x) == CODE_FOR_stuff_delay_slot)))) + { + rtx pat = PATTERN (prev); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0)) + return 0; + } + } + } + + return align_jumps_log; +} + +/* If we are inside a phony loop, almost any kind of label can turn up as the + first one in the loop. Aligning a braf label causes incorrect switch + destination addresses; we can detect braf labels because they are + followed by a BARRIER. + Applying loop alignment to small constant or switch tables is a waste + of space, so we suppress this too. */ +int +sh_loop_align (rtx label) +{ + rtx next = label; + + do + next = next_nonnote_insn (next); + while (next && LABEL_P (next)); + + if (! next + || ! INSN_P (next) + || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC + || recog_memoized (next) == CODE_FOR_consttable_2) + return 0; + + return align_loops_log; +} + +/* Do a final pass over the function, just before delayed branch + scheduling. */ + +static void +sh_reorg (void) +{ + rtx first, insn, mova = NULL_RTX; + int num_mova; + rtx r0_rtx = gen_rtx_REG (Pmode, 0); + rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx); + + first = get_insns (); + max_labelno_before_reorg = max_label_num (); + + /* We must split call insns before introducing `mova's. If we're + optimizing, they'll have already been split. Otherwise, make + sure we don't split them too late. */ + if (! optimize) + split_all_insns_noflow (); + + if (TARGET_SHMEDIA) + return; + + /* If relaxing, generate pseudo-ops to associate function calls with + the symbols they call. It does no harm to not generate these + pseudo-ops. However, when we can generate them, it enables to + linker to potentially relax the jsr to a bsr, and eliminate the + register load and, possibly, the constant pool entry. */ + + mdep_reorg_phase = SH_INSERT_USES_LABELS; + if (TARGET_RELAX) + { + /* Remove all REG_LABEL_OPERAND notes. We want to use them for our + own purposes. This works because none of the remaining passes + need to look at them. + + ??? But it may break in the future. We should use a machine + dependent REG_NOTE, or some other approach entirely. */ + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + if (INSN_P (insn)) + { + rtx note; + + while ((note = find_reg_note (insn, REG_LABEL_OPERAND, + NULL_RTX)) != 0) + remove_note (insn, note); + } + } + + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx pattern, reg, link, set, scan, dies, label; + int rescan = 0, foundinsn = 0; + + if (CALL_P (insn)) + { + pattern = PATTERN (insn); + + if (GET_CODE (pattern) == PARALLEL) + pattern = XVECEXP (pattern, 0, 0); + if (GET_CODE (pattern) == SET) + pattern = SET_SRC (pattern); + + if (GET_CODE (pattern) != CALL + || !MEM_P (XEXP (pattern, 0))) + continue; + + reg = XEXP (XEXP (pattern, 0), 0); + } + else + { + reg = sfunc_uses_reg (insn); + if (! reg) + continue; + } + + if (!REG_P (reg)) + continue; + + /* Try scanning backward to find where the register is set. */ + link = NULL; + for (scan = PREV_INSN (insn); + scan && !LABEL_P (scan); + scan = PREV_INSN (scan)) + { + if (! INSN_P (scan)) + continue; + + if (! reg_mentioned_p (reg, scan)) + continue; + + if (noncall_uses_reg (reg, scan, &set)) + break; + + if (set) + { + link = scan; + break; + } + } + + if (! link) + continue; + + /* The register is set at LINK. */ + + /* We can only optimize the function call if the register is + being set to a symbol. In theory, we could sometimes + optimize calls to a constant location, but the assembler + and linker do not support that at present. */ + if (GET_CODE (SET_SRC (set)) != SYMBOL_REF + && GET_CODE (SET_SRC (set)) != LABEL_REF) + continue; + + /* Scan forward from LINK to the place where REG dies, and + make sure that the only insns which use REG are + themselves function calls. */ + + /* ??? This doesn't work for call targets that were allocated + by reload, since there may not be a REG_DEAD note for the + register. */ + + dies = NULL_RTX; + for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan)) + { + rtx scanset; + + /* Don't try to trace forward past a CODE_LABEL if we haven't + seen INSN yet. Ordinarily, we will only find the setting insn + if it is in the same basic block. However, + cross-jumping can insert code labels in between the load and + the call, and can result in situations where a single call + insn may have two targets depending on where we came from. */ + + if (LABEL_P (scan) && ! foundinsn) + break; + + if (! INSN_P (scan)) + continue; + + /* Don't try to trace forward past a JUMP. To optimize + safely, we would have to check that all the + instructions at the jump destination did not use REG. */ + + if (JUMP_P (scan)) + break; + + if (! reg_mentioned_p (reg, scan)) + continue; + + if (noncall_uses_reg (reg, scan, &scanset)) + break; + + if (scan == insn) + foundinsn = 1; + + if (scan != insn + && (CALL_P (scan) || sfunc_uses_reg (scan))) + { + /* There is a function call to this register other + than the one we are checking. If we optimize + this call, we need to rescan again below. */ + rescan = 1; + } + + /* ??? We shouldn't have to worry about SCANSET here. + We should just be able to check for a REG_DEAD note + on a function call. However, the REG_DEAD notes are + apparently not dependable around libcalls; c-torture + execute/920501-2 is a test case. If SCANSET is set, + then this insn sets the register, so it must have + died earlier. Unfortunately, this will only handle + the cases in which the register is, in fact, set in a + later insn. */ + + /* ??? We shouldn't have to use FOUNDINSN here. + This dates back to when we used LOG_LINKS to find + the most recent insn which sets the register. */ + + if (foundinsn + && (scanset + || find_reg_note (scan, REG_DEAD, reg))) + { + dies = scan; + break; + } + } + + if (! dies) + { + /* Either there was a branch, or some insn used REG + other than as a function call address. */ + continue; + } + + /* Create a code label, and put it in a REG_LABEL_OPERAND note + on the insn which sets the register, and on each call insn + which uses the register. In final_prescan_insn we look for + the REG_LABEL_OPERAND notes, and output the appropriate label + or pseudo-op. */ + + label = gen_label_rtx (); + add_reg_note (link, REG_LABEL_OPERAND, label); + add_reg_note (insn, REG_LABEL_OPERAND, label); + if (rescan) + { + scan = link; + do + { + rtx reg2; + + scan = NEXT_INSN (scan); + if (scan != insn + && ((CALL_P (scan) + && reg_mentioned_p (reg, scan)) + || ((reg2 = sfunc_uses_reg (scan)) + && REGNO (reg2) == REGNO (reg)))) + add_reg_note (scan, REG_LABEL_OPERAND, label); + } + while (scan != dies); + } + } + } + + if (TARGET_SH2) + fixup_addr_diff_vecs (first); + + if (optimize) + { + mdep_reorg_phase = SH_SHORTEN_BRANCHES0; + shorten_branches (first); + } + + /* Scan the function looking for move instructions which have to be + changed to pc-relative loads and insert the literal tables. */ + label_ref_list_pool = create_alloc_pool ("label references list", + sizeof (struct label_ref_list_d), + 30); + mdep_reorg_phase = SH_FIXUP_PCLOAD; + for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn)) + { + if (mova_p (insn)) + { + /* ??? basic block reordering can move a switch table dispatch + below the switch table. Check if that has happened. + We only have the addresses available when optimizing; but then, + this check shouldn't be needed when not optimizing. */ + if (!untangle_mova (&num_mova, &mova, insn)) + { + insn = mova; + num_mova = 0; + } + } + else if (JUMP_P (insn) + && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC + && num_mova + /* ??? loop invariant motion can also move a mova out of a + loop. Since loop does this code motion anyway, maybe we + should wrap UNSPEC_MOVA into a CONST, so that reload can + move it back. */ + && ((num_mova > 1 + && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode) + || (prev_nonnote_insn (insn) + == XEXP (MOVA_LABELREF (mova), 0)))) + { + rtx scan; + int total; + + num_mova--; + + /* Some code might have been inserted between the mova and + its ADDR_DIFF_VEC. Check if the mova is still in range. */ + for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan)) + total += get_attr_length (scan); + + /* range of mova is 1020, add 4 because pc counts from address of + second instruction after this one, subtract 2 in case pc is 2 + byte aligned. Possible alignment needed for the ADDR_DIFF_VEC + cancels out with alignment effects of the mova itself. */ + if (total > 1022) + { + /* Change the mova into a load, and restart scanning + there. broken_move will then return true for mova. */ + fixup_mova (mova); + insn = mova; + } + } + if (broken_move (insn) + || (NONJUMP_INSN_P (insn) + && recog_memoized (insn) == CODE_FOR_casesi_worker_2)) + { + rtx scan; + /* Scan ahead looking for a barrier to stick the constant table + behind. */ + rtx barrier = find_barrier (num_mova, mova, insn); + rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL; + int need_aligned_label = 0; + + if (num_mova && ! mova_p (mova)) + { + /* find_barrier had to change the first mova into a + pcload; thus, we have to start with this new pcload. */ + insn = mova; + num_mova = 0; + } + /* Now find all the moves between the points and modify them. */ + for (scan = insn; scan != barrier; scan = NEXT_INSN (scan)) + { + if (LABEL_P (scan)) + last_float = 0; + if (NONJUMP_INSN_P (scan) + && recog_memoized (scan) == CODE_FOR_casesi_worker_2) + need_aligned_label = 1; + if (broken_move (scan)) + { + rtx *patp = &PATTERN (scan), pat = *patp; + rtx src, dst; + rtx lab; + rtx newsrc; + enum machine_mode mode; + + if (GET_CODE (pat) == PARALLEL) + patp = &XVECEXP (pat, 0, 0), pat = *patp; + src = SET_SRC (pat); + dst = SET_DEST (pat); + mode = GET_MODE (dst); + + if (mode == SImode && hi_const (src) + && REGNO (dst) != FPUL_REG) + { + int offset = 0; + + mode = HImode; + while (GET_CODE (dst) == SUBREG) + { + offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)), + GET_MODE (SUBREG_REG (dst)), + SUBREG_BYTE (dst), + GET_MODE (dst)); + dst = SUBREG_REG (dst); + } + dst = gen_rtx_REG (HImode, REGNO (dst) + offset); + } + if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst))) + { + /* This must be an insn that clobbers r0. */ + rtx *clobberp = &XVECEXP (PATTERN (scan), 0, + XVECLEN (PATTERN (scan), 0) + - 1); + rtx clobber = *clobberp; + + gcc_assert (GET_CODE (clobber) == CLOBBER + && rtx_equal_p (XEXP (clobber, 0), r0_rtx)); + + if (last_float + && reg_set_between_p (r0_rtx, last_float_move, scan)) + last_float = 0; + if (last_float + && TARGET_SHCOMPACT + && GET_MODE_SIZE (mode) != 4 + && GET_MODE_SIZE (GET_MODE (last_float)) == 4) + last_float = 0; + lab = add_constant (src, mode, last_float); + if (lab) + emit_insn_before (gen_mova (lab), scan); + else + { + /* There will be a REG_UNUSED note for r0 on + LAST_FLOAT_MOVE; we have to change it to REG_INC, + lest reorg:mark_target_live_regs will not + consider r0 to be used, and we end up with delay + slot insn in front of SCAN that clobbers r0. */ + rtx note + = find_regno_note (last_float_move, REG_UNUSED, 0); + + /* If we are not optimizing, then there may not be + a note. */ + if (note) + PUT_REG_NOTE_KIND (note, REG_INC); + + *last_float_addr = r0_inc_rtx; + } + last_float_move = scan; + last_float = src; + newsrc = gen_const_mem (mode, + (((TARGET_SH4 && ! TARGET_FMOVD) + || REGNO (dst) == FPUL_REG) + ? r0_inc_rtx + : r0_rtx)); + last_float_addr = &XEXP (newsrc, 0); + + /* Remove the clobber of r0. */ + *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber), + gen_rtx_SCRATCH (Pmode)); + } + /* This is a mova needing a label. Create it. */ + else if (GET_CODE (src) == UNSPEC + && XINT (src, 1) == UNSPEC_MOVA + && GET_CODE (XVECEXP (src, 0, 0)) == CONST) + { + lab = add_constant (XVECEXP (src, 0, 0), mode, 0); + newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); + newsrc = gen_rtx_UNSPEC (SImode, + gen_rtvec (1, newsrc), + UNSPEC_MOVA); + } + else + { + lab = add_constant (src, mode, 0); + newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); + newsrc = gen_const_mem (mode, newsrc); + } + *patp = gen_rtx_SET (VOIDmode, dst, newsrc); + INSN_CODE (scan) = -1; + } + } + dump_table (need_aligned_label ? insn : 0, barrier); + insn = barrier; + } + } + free_alloc_pool (label_ref_list_pool); + for (insn = first; insn; insn = NEXT_INSN (insn)) + PUT_MODE (insn, VOIDmode); + + mdep_reorg_phase = SH_SHORTEN_BRANCHES1; + INSN_ADDRESSES_FREE (); + split_branches (first); + + /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it + also has an effect on the register that holds the address of the sfunc. + Insert an extra dummy insn in front of each sfunc that pretends to + use this register. */ + if (flag_delayed_branch) + { + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx reg = sfunc_uses_reg (insn); + + if (! reg) + continue; + emit_insn_before (gen_use_sfunc_addr (reg), insn); + } + } +#if 0 + /* fpscr is not actually a user variable, but we pretend it is for the + sake of the previous optimization passes, since we want it handled like + one. However, we don't have any debugging information for it, so turn + it into a non-user variable now. */ + if (TARGET_SH4) + REG_USERVAR_P (get_fpscr_rtx ()) = 0; +#endif + mdep_reorg_phase = SH_AFTER_MDEP_REORG; +} + +int +get_dest_uid (rtx label, int max_uid) +{ + rtx dest = next_real_insn (label); + int dest_uid; + if (! dest) + /* This can happen for an undefined label. */ + return 0; + dest_uid = INSN_UID (dest); + /* If this is a newly created branch redirection blocking instruction, + we cannot index the branch_uid or insn_addresses arrays with its + uid. But then, we won't need to, because the actual destination is + the following branch. */ + while (dest_uid >= max_uid) + { + dest = NEXT_INSN (dest); + dest_uid = INSN_UID (dest); + } + if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN) + return 0; + return dest_uid; +} + +/* Split condbranches that are out of range. Also add clobbers for + scratch registers that are needed in far jumps. + We do this before delay slot scheduling, so that it can take our + newly created instructions into account. It also allows us to + find branches with common targets more easily. */ + +static void +split_branches (rtx first) +{ + rtx insn; + struct far_branch **uid_branch, *far_branch_list = 0; + int max_uid = get_max_uid (); + int ok; + + /* Find out which branches are out of range. */ + shorten_branches (first); + + uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch); + memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch); + + for (insn = first; insn; insn = NEXT_INSN (insn)) + if (! INSN_P (insn)) + continue; + else if (INSN_DELETED_P (insn)) + { + /* Shorten_branches would split this instruction again, + so transform it into a note. */ + SET_INSN_DELETED (insn); + } + else if (JUMP_P (insn) + /* Don't mess with ADDR_DIFF_VEC */ + && (GET_CODE (PATTERN (insn)) == SET + || GET_CODE (PATTERN (insn)) == RETURN)) + { + enum attr_type type = get_attr_type (insn); + if (type == TYPE_CBRANCH) + { + rtx next, beyond; + + if (get_attr_length (insn) > 4) + { + rtx src = SET_SRC (PATTERN (insn)); + rtx olabel = XEXP (XEXP (src, 1), 0); + int addr = INSN_ADDRESSES (INSN_UID (insn)); + rtx label = 0; + int dest_uid = get_dest_uid (olabel, max_uid); + struct far_branch *bp = uid_branch[dest_uid]; + + /* redirect_jump needs a valid JUMP_LABEL, and it might delete + the label if the LABEL_NUSES count drops to zero. There is + always a jump_optimize pass that sets these values, but it + proceeds to delete unreferenced code, and then if not + optimizing, to un-delete the deleted instructions, thus + leaving labels with too low uses counts. */ + if (! optimize) + { + JUMP_LABEL (insn) = olabel; + LABEL_NUSES (olabel)++; + } + if (! bp) + { + bp = (struct far_branch *) alloca (sizeof *bp); + uid_branch[dest_uid] = bp; + bp->prev = far_branch_list; + far_branch_list = bp; + bp->far_label + = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0); + LABEL_NUSES (bp->far_label)++; + } + else + { + label = bp->near_label; + if (! label && bp->address - addr >= CONDJUMP_MIN) + { + rtx block = bp->insert_place; + + if (GET_CODE (PATTERN (block)) == RETURN) + block = PREV_INSN (block); + else + block = gen_block_redirect (block, + bp->address, 2); + label = emit_label_after (gen_label_rtx (), + PREV_INSN (block)); + bp->near_label = label; + } + else if (label && ! NEXT_INSN (label)) + { + if (addr + 2 - bp->address <= CONDJUMP_MAX) + bp->insert_place = insn; + else + gen_far_branch (bp); + } + } + if (! label + || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)) + { + bp->near_label = label = gen_label_rtx (); + bp->insert_place = insn; + bp->address = addr; + } + ok = redirect_jump (insn, label, 0); + gcc_assert (ok); + } + else + { + /* get_attr_length (insn) == 2 */ + /* Check if we have a pattern where reorg wants to redirect + the branch to a label from an unconditional branch that + is too far away. */ + /* We can't use JUMP_LABEL here because it might be undefined + when not optimizing. */ + /* A syntax error might cause beyond to be NULL_RTX. */ + beyond + = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), + 0)); + + if (beyond + && (JUMP_P (beyond) + || ((beyond = next_active_insn (beyond)) + && JUMP_P (beyond))) + && GET_CODE (PATTERN (beyond)) == SET + && recog_memoized (beyond) == CODE_FOR_jump_compact + && ((INSN_ADDRESSES + (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))) + - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252) + > 252 + 258 + 2)) + gen_block_redirect (beyond, + INSN_ADDRESSES (INSN_UID (beyond)), 1); + } + + next = next_active_insn (insn); + + if (next + && (JUMP_P (next) + || ((next = next_active_insn (next)) + && JUMP_P (next))) + && GET_CODE (PATTERN (next)) == SET + && recog_memoized (next) == CODE_FOR_jump_compact + && ((INSN_ADDRESSES + (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))) + - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252) + > 252 + 258 + 2)) + gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1); + } + else if (type == TYPE_JUMP || type == TYPE_RETURN) + { + int addr = INSN_ADDRESSES (INSN_UID (insn)); + rtx far_label = 0; + int dest_uid = 0; + struct far_branch *bp; + + if (type == TYPE_JUMP) + { + far_label = XEXP (SET_SRC (PATTERN (insn)), 0); + dest_uid = get_dest_uid (far_label, max_uid); + if (! dest_uid) + { + /* Parse errors can lead to labels outside + the insn stream. */ + if (! NEXT_INSN (far_label)) + continue; + + if (! optimize) + { + JUMP_LABEL (insn) = far_label; + LABEL_NUSES (far_label)++; + } + redirect_jump (insn, NULL_RTX, 1); + far_label = 0; + } + } + bp = uid_branch[dest_uid]; + if (! bp) + { + bp = (struct far_branch *) alloca (sizeof *bp); + uid_branch[dest_uid] = bp; + bp->prev = far_branch_list; + far_branch_list = bp; + bp->near_label = 0; + bp->far_label = far_label; + if (far_label) + LABEL_NUSES (far_label)++; + } + else if (bp->near_label && ! NEXT_INSN (bp->near_label)) + if (addr - bp->address <= CONDJUMP_MAX) + emit_label_after (bp->near_label, PREV_INSN (insn)); + else + { + gen_far_branch (bp); + bp->near_label = 0; + } + else + bp->near_label = 0; + bp->address = addr; + bp->insert_place = insn; + if (! far_label) + emit_insn_before (gen_block_branch_redirect (const0_rtx), insn); + else + gen_block_redirect (insn, addr, bp->near_label ? 2 : 0); + } + } + /* Generate all pending far branches, + and free our references to the far labels. */ + while (far_branch_list) + { + if (far_branch_list->near_label + && ! NEXT_INSN (far_branch_list->near_label)) + gen_far_branch (far_branch_list); + if (optimize + && far_branch_list->far_label + && ! --LABEL_NUSES (far_branch_list->far_label)) + delete_insn (far_branch_list->far_label); + far_branch_list = far_branch_list->prev; + } + + /* Instruction length information is no longer valid due to the new + instructions that have been generated. */ + init_insn_lengths (); +} + +/* Dump out instruction addresses, which is useful for debugging the + constant pool table stuff. + + If relaxing, output the label and pseudo-ops used to link together + calls and the instruction which set the registers. */ + +/* ??? The addresses printed by this routine for insns are nonsense for + insns which are inside of a sequence where none of the inner insns have + variable length. This is because the second pass of shorten_branches + does not bother to update them. */ + +void +final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED, + int noperands ATTRIBUTE_UNUSED) +{ + if (TARGET_DUMPISIZE) + fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn))); + + if (TARGET_RELAX) + { + rtx note; + + note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX); + if (note) + { + rtx pattern; + + pattern = PATTERN (insn); + if (GET_CODE (pattern) == PARALLEL) + pattern = XVECEXP (pattern, 0, 0); + switch (GET_CODE (pattern)) + { + case SET: + if (GET_CODE (SET_SRC (pattern)) != CALL + && get_attr_type (insn) != TYPE_SFUNC) + { + targetm.asm_out.internal_label + (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0))); + break; + } + /* else FALLTHROUGH */ + case CALL: + asm_fprintf (asm_out_file, "\t.uses %LL%d\n", + CODE_LABEL_NUMBER (XEXP (note, 0))); + break; + + default: + gcc_unreachable (); + } + } + } +} + +/* Dump out any constants accumulated in the final pass. These will + only be labels. */ + +const char * +output_jump_label_table (void) +{ + int i; + + if (pool_size) + { + fprintf (asm_out_file, "\t.align 2\n"); + for (i = 0; i < pool_size; i++) + { + pool_node *p = &pool_vector[i]; + + (*targetm.asm_out.internal_label) (asm_out_file, "L", + CODE_LABEL_NUMBER (p->label)); + output_asm_insn (".long %O0", &p->value); + } + pool_size = 0; + } + + return ""; +} + +/* A full frame looks like: + + arg-5 + arg-4 + [ if current_function_anonymous_args + arg-3 + arg-2 + arg-1 + arg-0 ] + saved-fp + saved-r10 + saved-r11 + saved-r12 + saved-pr + local-n + .. + local-1 + local-0 <- fp points here. */ + +/* Number of bytes pushed for anonymous args, used to pass information + between expand_prologue and expand_epilogue. */ + +/* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be + adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's + for an epilogue and a negative value means that it's for a sibcall + epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of + all the registers that are about to be restored, and hence dead. */ + +static void +output_stack_adjust (int size, rtx reg, int epilogue_p, + HARD_REG_SET *live_regs_mask, bool frame_p) +{ + rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn; + if (size) + { + HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT; + +/* This test is bogus, as output_stack_adjust is used to re-align the + stack. */ +#if 0 + gcc_assert (!(size % align)); +#endif + + if (CONST_OK_FOR_ADD (size)) + emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size))); + /* Try to do it with two partial adjustments; however, we must make + sure that the stack is properly aligned at all times, in case + an interrupt occurs between the two partial adjustments. */ + else if (CONST_OK_FOR_ADD (size / 2 & -align) + && CONST_OK_FOR_ADD (size - (size / 2 & -align))) + { + emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align))); + emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align)))); + } + else + { + rtx const_reg; + rtx insn; + int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1); + int i; + + /* If TEMP is invalid, we could temporarily save a general + register to MACL. However, there is currently no need + to handle this case, so just die when we see it. */ + if (epilogue_p < 0 + || current_function_interrupt + || ! call_really_used_regs[temp] || fixed_regs[temp]) + temp = -1; + if (temp < 0 && ! current_function_interrupt + && (TARGET_SHMEDIA || epilogue_p >= 0)) + { + HARD_REG_SET temps; + COPY_HARD_REG_SET (temps, call_used_reg_set); + AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set); + if (epilogue_p > 0) + { + int nreg = 0; + if (crtl->return_rtx) + { + enum machine_mode mode; + mode = GET_MODE (crtl->return_rtx); + if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG) + nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode); + } + for (i = 0; i < nreg; i++) + CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i); + if (crtl->calls_eh_return) + { + CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO); + for (i = 0; i <= 3; i++) + CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i)); + } + } + if (TARGET_SHMEDIA && epilogue_p < 0) + for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++) + CLEAR_HARD_REG_BIT (temps, i); + if (epilogue_p <= 0) + { + for (i = FIRST_PARM_REG; + i < FIRST_PARM_REG + NPARM_REGS (SImode); i++) + CLEAR_HARD_REG_BIT (temps, i); + if (cfun->static_chain_decl != NULL) + CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM); + } + temp = scavenge_reg (&temps); + } + if (temp < 0 && live_regs_mask) + { + HARD_REG_SET temps; + + COPY_HARD_REG_SET (temps, *live_regs_mask); + CLEAR_HARD_REG_BIT (temps, REGNO (reg)); + temp = scavenge_reg (&temps); + } + if (temp < 0) + { + rtx adj_reg, tmp_reg, mem; + + /* If we reached here, the most likely case is the (sibcall) + epilogue for non SHmedia. Put a special push/pop sequence + for such case as the last resort. This looks lengthy but + would not be problem because it seems to be very + rare. */ + + gcc_assert (!TARGET_SHMEDIA && epilogue_p); + + + /* ??? There is still the slight possibility that r4 or + r5 have been reserved as fixed registers or assigned + as global registers, and they change during an + interrupt. There are possible ways to handle this: + + - If we are adjusting the frame pointer (r14), we can do + with a single temp register and an ordinary push / pop + on the stack. + - Grab any call-used or call-saved registers (i.e. not + fixed or globals) for the temps we need. We might + also grab r14 if we are adjusting the stack pointer. + If we can't find enough available registers, issue + a diagnostic and die - the user must have reserved + way too many registers. + But since all this is rather unlikely to happen and + would require extra testing, we just die if r4 / r5 + are not available. */ + gcc_assert (!fixed_regs[4] && !fixed_regs[5] + && !global_regs[4] && !global_regs[5]); + + adj_reg = gen_rtx_REG (GET_MODE (reg), 4); + tmp_reg = gen_rtx_REG (GET_MODE (reg), 5); + emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg); + emit_insn (GEN_MOV (adj_reg, GEN_INT (size))); + emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg)); + mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg)); + emit_move_insn (mem, tmp_reg); + emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg)); + mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg)); + emit_move_insn (mem, tmp_reg); + emit_move_insn (reg, adj_reg); + mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg)); + emit_move_insn (adj_reg, mem); + mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg)); + emit_move_insn (tmp_reg, mem); + /* Tell flow the insns that pop r4/r5 aren't dead. */ + emit_use (tmp_reg); + emit_use (adj_reg); + return; + } + const_reg = gen_rtx_REG (GET_MODE (reg), temp); + + /* If SIZE is negative, subtract the positive value. + This sometimes allows a constant pool entry to be shared + between prologue and epilogue code. */ + if (size < 0) + { + emit_insn (GEN_MOV (const_reg, GEN_INT (-size))); + insn = emit_fn (GEN_SUB3 (reg, reg, const_reg)); + } + else + { + emit_insn (GEN_MOV (const_reg, GEN_INT (size))); + insn = emit_fn (GEN_ADD3 (reg, reg, const_reg)); + } + if (! epilogue_p) + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, reg, + gen_rtx_PLUS (SImode, reg, + GEN_INT (size)))); + } + } +} + +static rtx +frame_insn (rtx x) +{ + x = emit_insn (x); + RTX_FRAME_RELATED_P (x) = 1; + return x; +} + +/* Output RTL to push register RN onto the stack. */ + +static rtx +push (int rn) +{ + rtx x; + if (rn == FPUL_REG) + x = gen_push_fpul (); + else if (rn == FPSCR_REG) + x = gen_push_fpscr (); + else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE + && FP_OR_XD_REGISTER_P (rn)) + { + if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1) + return NULL_RTX; + x = gen_push_4 (gen_rtx_REG (DFmode, rn)); + } + else if (TARGET_SH2E && FP_REGISTER_P (rn)) + x = gen_push_e (gen_rtx_REG (SFmode, rn)); + else + x = gen_push (gen_rtx_REG (SImode, rn)); + + x = frame_insn (x); + add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM)); + return x; +} + +/* Output RTL to pop register RN from the stack. */ + +static void +pop (int rn) +{ + rtx x; + if (rn == FPUL_REG) + x = gen_pop_fpul (); + else if (rn == FPSCR_REG) + x = gen_pop_fpscr (); + else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE + && FP_OR_XD_REGISTER_P (rn)) + { + if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1) + return; + x = gen_pop_4 (gen_rtx_REG (DFmode, rn)); + } + else if (TARGET_SH2E && FP_REGISTER_P (rn)) + x = gen_pop_e (gen_rtx_REG (SFmode, rn)); + else + x = gen_pop (gen_rtx_REG (SImode, rn)); + + x = emit_insn (x); + add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM)); +} + +/* Generate code to push the regs specified in the mask. */ + +static void +push_regs (HARD_REG_SET *mask, int interrupt_handler) +{ + int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0; + int skip_fpscr = 0; + + /* Push PR last; this gives better latencies after the prologue, and + candidates for the return delay slot when there are no general + registers pushed. */ + for (; i < FIRST_PSEUDO_REGISTER; i++) + { + /* If this is an interrupt handler, and the SZ bit varies, + and we have to push any floating point register, we need + to switch to the correct precision first. */ + if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD + && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS])) + { + HARD_REG_SET unsaved; + + push (FPSCR_REG); + COMPL_HARD_REG_SET (unsaved, *mask); + fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved); + skip_fpscr = 1; + } + if (i != PR_REG + && (i != FPSCR_REG || ! skip_fpscr) + && TEST_HARD_REG_BIT (*mask, i)) + { + /* If the ISR has RESBANK attribute assigned, don't push any of + the following registers - R0-R14, MACH, MACL and GBR. */ + if (! (sh_cfun_resbank_handler_p () + && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG) + || i == MACH_REG + || i == MACL_REG + || i == GBR_REG))) + push (i); + } + } + + /* Push banked registers last to improve delay slot opportunities. */ + if (interrupt_handler) + { + bool use_movml = false; + + if (TARGET_SH2A) + { + unsigned int count = 0; + + for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) + if (TEST_HARD_REG_BIT (*mask, i)) + count++; + else + break; + + /* Use movml when all banked registers are pushed. */ + if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1) + use_movml = true; + } + + if (use_movml) + { + rtx x, mem, reg, set; + rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); + + /* We must avoid scheduling multiple store insn with another + insns. */ + emit_insn (gen_blockage ()); + x = gen_movml_push_banked (sp_reg); + x = frame_insn (x); + for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) + { + mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4)); + reg = gen_rtx_REG (SImode, i); + add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg)); + } + + set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32)); + add_reg_note (x, REG_CFA_ADJUST_CFA, set); + emit_insn (gen_blockage ()); + } + else + for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) + if (TEST_HARD_REG_BIT (*mask, i)) + push (i); + } + + /* Don't push PR register for an ISR with RESBANK attribute assigned. */ + if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ()) + push (PR_REG); +} + +/* Calculate how much extra space is needed to save all callee-saved + target registers. + LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */ + +static int +shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask) +{ + int reg; + int stack_space = 0; + int interrupt_handler = sh_cfun_interrupt_handler_p (); + + for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--) + if ((! call_really_used_regs[reg] || interrupt_handler) + && ! TEST_HARD_REG_BIT (*live_regs_mask, reg)) + /* Leave space to save this target register on the stack, + in case target register allocation wants to use it. */ + stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); + return stack_space; +} + +/* Decide whether we should reserve space for callee-save target registers, + in case target register allocation wants to use them. REGS_SAVED is + the space, in bytes, that is already required for register saves. + LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */ + +static int +shmedia_reserve_space_for_target_registers_p (int regs_saved, + HARD_REG_SET *live_regs_mask) +{ + if (optimize_size) + return 0; + return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved; +} + +/* Decide how much space to reserve for callee-save target registers + in case target register allocation wants to use them. + LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */ + +static int +shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask) +{ + if (shmedia_space_reserved_for_target_registers) + return shmedia_target_regs_stack_space (live_regs_mask); + else + return 0; +} + +/* Work out the registers which need to be saved, both as a mask and a + count of saved words. Return the count. + + If doing a pragma interrupt function, then push all regs used by the + function, and if we call another function (we can tell by looking at PR), + make sure that all the regs it clobbers are safe too. */ + +static int +calc_live_regs (HARD_REG_SET *live_regs_mask) +{ + unsigned int reg; + int count; + tree attrs; + bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler; + bool nosave_low_regs; + int pr_live, has_call; + + attrs = DECL_ATTRIBUTES (current_function_decl); + interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p (); + trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE; + interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler; + nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE; + + CLEAR_HARD_REG_SET (*live_regs_mask); + if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler + && df_regs_ever_live_p (FPSCR_REG)) + target_flags &= ~MASK_FPU_SINGLE; + /* If we can save a lot of saves by switching to double mode, do that. */ + else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE) + for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2) + if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1) + && (! call_really_used_regs[reg] + || interrupt_handler) + && ++count > 2) + { + target_flags &= ~MASK_FPU_SINGLE; + break; + } + /* PR_MEDIA_REG is a general purpose register, thus global_alloc already + knows how to use it. That means the pseudo originally allocated for + the initial value can become the PR_MEDIA_REG hard register, as seen for + execute/20010122-1.c:test9. */ + if (TARGET_SHMEDIA) + /* ??? this function is called from initial_elimination_offset, hence we + can't use the result of sh_media_register_for_return here. */ + pr_live = sh_pr_n_sets (); + else + { + rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG); + pr_live = (pr_initial + ? (!REG_P (pr_initial) + || REGNO (pr_initial) != (PR_REG)) + : df_regs_ever_live_p (PR_REG)); + /* For Shcompact, if not optimizing, we end up with a memory reference + using the return address pointer for __builtin_return_address even + though there is no actual need to put the PR register on the stack. */ + pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM); + } + /* Force PR to be live if the prologue has to call the SHmedia + argument decoder or register saver. */ + if (TARGET_SHCOMPACT + && ((crtl->args.info.call_cookie + & ~ CALL_COOKIE_RET_TRAMP (1)) + || crtl->saves_all_registers)) + pr_live = 1; + has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live; + for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; ) + { + if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG) + ? pr_live + : interrupt_handler + ? (/* Need to save all the regs ever live. */ + (df_regs_ever_live_p (reg) + || (call_really_used_regs[reg] + && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG + || reg == PIC_OFFSET_TABLE_REGNUM) + && has_call) + || (TARGET_SHMEDIA && has_call + && REGISTER_NATURAL_MODE (reg) == SImode + && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg)))) + && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM + && reg != RETURN_ADDRESS_POINTER_REGNUM + && reg != T_REG && reg != GBR_REG + /* Push fpscr only on targets which have FPU */ + && (reg != FPSCR_REG || TARGET_FPU_ANY)) + : (/* Only push those regs which are used and need to be saved. */ + (TARGET_SHCOMPACT + && flag_pic + && crtl->args.info.call_cookie + && reg == PIC_OFFSET_TABLE_REGNUM) + || (df_regs_ever_live_p (reg) + && ((!call_really_used_regs[reg] + && !(reg != PIC_OFFSET_TABLE_REGNUM + && fixed_regs[reg] && call_used_regs[reg])) + || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY))) + || (crtl->calls_eh_return + && (reg == EH_RETURN_DATA_REGNO (0) + || reg == EH_RETURN_DATA_REGNO (1) + || reg == EH_RETURN_DATA_REGNO (2) + || reg == EH_RETURN_DATA_REGNO (3))) + || ((reg == MACL_REG || reg == MACH_REG) + && df_regs_ever_live_p (reg) + && sh_cfun_attr_renesas_p ()) + )) + { + SET_HARD_REG_BIT (*live_regs_mask, reg); + count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); + + if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD + && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT) + { + if (FP_REGISTER_P (reg)) + { + if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1)) + { + SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1)); + count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1)); + } + } + else if (XD_REGISTER_P (reg)) + { + /* Must switch to double mode to access these registers. */ + target_flags &= ~MASK_FPU_SINGLE; + } + } + } + if (nosave_low_regs && reg == R8_REG) + break; + } + /* If we have a target register optimization pass after prologue / epilogue + threading, we need to assume all target registers will be live even if + they aren't now. */ + if (flag_branch_target_load_optimize2 + && TARGET_SAVE_ALL_TARGET_REGS + && shmedia_space_reserved_for_target_registers) + for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--) + if ((! call_really_used_regs[reg] || interrupt_handler) + && ! TEST_HARD_REG_BIT (*live_regs_mask, reg)) + { + SET_HARD_REG_BIT (*live_regs_mask, reg); + count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); + } + /* If this is an interrupt handler, we don't have any call-clobbered + registers we can conveniently use for target register save/restore. + Make sure we save at least one general purpose register when we need + to save target registers. */ + if (interrupt_handler + && hard_reg_set_intersect_p (*live_regs_mask, + reg_class_contents[TARGET_REGS]) + && ! hard_reg_set_intersect_p (*live_regs_mask, + reg_class_contents[GENERAL_REGS])) + { + SET_HARD_REG_BIT (*live_regs_mask, R0_REG); + count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG)); + } + + return count; +} + +/* Code to generate prologue and epilogue sequences */ + +/* PUSHED is the number of bytes that are being pushed on the + stack for register saves. Return the frame size, padded + appropriately so that the stack stays properly aligned. */ +static HOST_WIDE_INT +rounded_frame_size (int pushed) +{ + HOST_WIDE_INT size = get_frame_size (); + HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT; + + if (ACCUMULATE_OUTGOING_ARGS) + size += crtl->outgoing_args_size; + + return ((size + pushed + align - 1) & -align) - pushed; +} + +/* Choose a call-clobbered target-branch register that remains + unchanged along the whole function. We set it up as the return + value in the prologue. */ +int +sh_media_register_for_return (void) +{ + int regno; + int tr0_used; + + if (! current_function_is_leaf) + return -1; + if (lookup_attribute ("interrupt_handler", + DECL_ATTRIBUTES (current_function_decl))) + return -1; + if (sh_cfun_interrupt_handler_p ()) + return -1; + + tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM); + + for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++) + if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno)) + return regno; + + return -1; +} + +/* The maximum registers we need to save are: + - 62 general purpose registers (r15 is stack pointer, r63 is zero) + - 32 floating point registers (for each pair, we save none, + one single precision value, or a double precision value). + - 8 target registers + - add 1 entry for a delimiter. */ +#define MAX_SAVED_REGS (62+32+8) + +typedef struct save_entry_s +{ + unsigned char reg; + unsigned char mode; + short offset; +} save_entry; + +#define MAX_TEMPS 4 + +/* There will be a delimiter entry with VOIDmode both at the start and the + end of a filled in schedule. The end delimiter has the offset of the + save with the smallest (i.e. most negative) offset. */ +typedef struct save_schedule_s +{ + save_entry entries[MAX_SAVED_REGS + 2]; + int temps[MAX_TEMPS+1]; +} save_schedule; + +/* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero, + use reverse order. Returns the last entry written to (not counting + the delimiter). OFFSET_BASE is a number to be added to all offset + entries. */ + +static save_entry * +sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule, + int offset_base) +{ + int align, i; + save_entry *entry = schedule->entries; + int tmpx = 0; + int offset; + + if (! current_function_interrupt) + for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++) + if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG + && ! FUNCTION_ARG_REGNO_P (i) + && i != FIRST_RET_REG + && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM) + && ! (crtl->calls_eh_return + && (i == EH_RETURN_STACKADJ_REGNO + || ((unsigned) i >= EH_RETURN_DATA_REGNO (0) + && (unsigned) i <= EH_RETURN_DATA_REGNO (3))))) + schedule->temps[tmpx++] = i; + entry->reg = -1; + entry->mode = VOIDmode; + entry->offset = offset_base; + entry++; + /* We loop twice: first, we save 8-byte aligned registers in the + higher addresses, that are known to be aligned. Then, we + proceed to saving 32-bit registers that don't need 8-byte + alignment. + If this is an interrupt function, all registers that need saving + need to be saved in full. moreover, we need to postpone saving + target registers till we have saved some general purpose registers + we can then use as scratch registers. */ + offset = offset_base; + for (align = 1; align >= 0; align--) + { + for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--) + if (TEST_HARD_REG_BIT (*live_regs_mask, i)) + { + enum machine_mode mode = REGISTER_NATURAL_MODE (i); + int reg = i; + + if (current_function_interrupt) + { + if (TARGET_REGISTER_P (i)) + continue; + if (GENERAL_REGISTER_P (i)) + mode = DImode; + } + if (mode == SFmode && (i % 2) == 1 + && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i) + && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1)))) + { + mode = DFmode; + i--; + reg--; + } + + /* If we're doing the aligned pass and this is not aligned, + or we're doing the unaligned pass and this is aligned, + skip it. */ + if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0) + != align) + continue; + + if (current_function_interrupt + && GENERAL_REGISTER_P (i) + && tmpx < MAX_TEMPS) + schedule->temps[tmpx++] = i; + + offset -= GET_MODE_SIZE (mode); + entry->reg = i; + entry->mode = mode; + entry->offset = offset; + entry++; + } + if (align && current_function_interrupt) + for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--) + if (TEST_HARD_REG_BIT (*live_regs_mask, i)) + { + offset -= GET_MODE_SIZE (DImode); + entry->reg = i; + entry->mode = DImode; + entry->offset = offset; + entry++; + } + } + entry->reg = -1; + entry->mode = VOIDmode; + entry->offset = offset; + schedule->temps[tmpx] = -1; + return entry - 1; +} + +void +sh_expand_prologue (void) +{ + HARD_REG_SET live_regs_mask; + int d, i; + int d_rounding = 0; + int save_flags = target_flags; + int pretend_args; + int stack_usage; + tree sp_switch_attr + = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)); + + current_function_interrupt = sh_cfun_interrupt_handler_p (); + + /* We have pretend args if we had an object sent partially in registers + and partially on the stack, e.g. a large structure. */ + pretend_args = crtl->args.pretend_args_size; + if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl) + && (NPARM_REGS(SImode) + > crtl->args.info.arg_count[(int) SH_ARG_INT])) + pretend_args = 0; + + output_stack_adjust (-pretend_args + - crtl->args.info.stack_regs * 8, + stack_pointer_rtx, 0, NULL, true); + stack_usage = pretend_args + crtl->args.info.stack_regs * 8; + + if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie) + /* We're going to use the PIC register to load the address of the + incoming-argument decoder and/or of the return trampoline from + the GOT, so make sure the PIC register is preserved and + initialized. */ + df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); + + if (TARGET_SHCOMPACT + && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1))) + { + int reg; + + /* First, make all registers with incoming arguments that will + be pushed onto the stack live, so that register renaming + doesn't overwrite them. */ + for (reg = 0; reg < NPARM_REGS (SImode); reg++) + if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie) + >= NPARM_REGS (SImode) - reg) + for (; reg < NPARM_REGS (SImode); reg++) + emit_insn (gen_shcompact_preserve_incoming_args + (gen_rtx_REG (SImode, FIRST_PARM_REG + reg))); + else if (CALL_COOKIE_INT_REG_GET + (crtl->args.info.call_cookie, reg) == 1) + emit_insn (gen_shcompact_preserve_incoming_args + (gen_rtx_REG (SImode, FIRST_PARM_REG + reg))); + + emit_move_insn (gen_rtx_REG (Pmode, MACL_REG), + stack_pointer_rtx); + emit_move_insn (gen_rtx_REG (SImode, R0_REG), + GEN_INT (crtl->args.info.call_cookie)); + emit_move_insn (gen_rtx_REG (SImode, MACH_REG), + gen_rtx_REG (SImode, R0_REG)); + } + else if (TARGET_SHMEDIA) + { + int tr = sh_media_register_for_return (); + + if (tr >= 0) + emit_move_insn (gen_rtx_REG (DImode, tr), + gen_rtx_REG (DImode, PR_MEDIA_REG)); + } + + /* Emit the code for SETUP_VARARGS. */ + if (cfun->stdarg) + { + if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)) + { + /* Push arg regs as if they'd been provided by caller in stack. */ + for (i = 0; i < NPARM_REGS(SImode); i++) + { + int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1; + + if (i >= (NPARM_REGS(SImode) + - crtl->args.info.arg_count[(int) SH_ARG_INT] + )) + break; + push (rn); + stack_usage += GET_MODE_SIZE (SImode); + } + } + } + + /* If we're supposed to switch stacks at function entry, do so now. */ + if (sp_switch_attr) + { + rtx lab, newsrc; + /* The argument specifies a variable holding the address of the + stack the interrupt function should switch to/from at entry/exit. */ + tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr)); + const char *s + = ggc_strdup (TREE_STRING_POINTER (arg)); + rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s); + + lab = add_constant (sp_switch, SImode, 0); + newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); + newsrc = gen_const_mem (SImode, newsrc); + + emit_insn (gen_sp_switch_1 (newsrc)); + } + + d = calc_live_regs (&live_regs_mask); + /* ??? Maybe we could save some switching if we can move a mode switch + that already happens to be at the function start into the prologue. */ + if (target_flags != save_flags && ! current_function_interrupt) + emit_insn (gen_toggle_sz ()); + + if (TARGET_SH5) + { + int offset_base, offset; + rtx r0 = NULL_RTX; + int offset_in_r0 = -1; + int sp_in_r0 = 0; + int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask); + int total_size, save_size; + save_schedule schedule; + save_entry *entry; + int *tmp_pnt; + + if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG] + && ! current_function_interrupt) + r0 = gen_rtx_REG (Pmode, R0_REG); + + /* D is the actual number of bytes that we need for saving registers, + however, in initial_elimination_offset we have committed to using + an additional TREGS_SPACE amount of bytes - in order to keep both + addresses to arguments supplied by the caller and local variables + valid, we must keep this gap. Place it between the incoming + arguments and the actually saved registers in a bid to optimize + locality of reference. */ + total_size = d + tregs_space; + total_size += rounded_frame_size (total_size); + save_size = total_size - rounded_frame_size (d); + if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT)) + d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT) + - save_size % (STACK_BOUNDARY / BITS_PER_UNIT)); + + /* If adjusting the stack in a single step costs nothing extra, do so. + I.e. either if a single addi is enough, or we need a movi anyway, + and we don't exceed the maximum offset range (the test for the + latter is conservative for simplicity). */ + if (TARGET_SHMEDIA + && (CONST_OK_FOR_I10 (-total_size) + || (! CONST_OK_FOR_I10 (-(save_size + d_rounding)) + && total_size <= 2044))) + d_rounding = total_size - save_size; + + offset_base = d + d_rounding; + + output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx, + 0, NULL, true); + stack_usage += save_size + d_rounding; + + sh5_schedule_saves (&live_regs_mask, &schedule, offset_base); + tmp_pnt = schedule.temps; + for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++) + { + enum machine_mode mode = (enum machine_mode) entry->mode; + unsigned int reg = entry->reg; + rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX; + rtx orig_reg_rtx; + + offset = entry->offset; + + reg_rtx = gen_rtx_REG (mode, reg); + + mem_rtx = gen_frame_mem (mode, + gen_rtx_PLUS (Pmode, + stack_pointer_rtx, + GEN_INT (offset))); + + if (!memory_address_p (mode, XEXP (mem_rtx, 0))) + { + gcc_assert (r0); + mem_rtx = NULL_RTX; + } + + if (HAVE_PRE_DECREMENT + && (offset_in_r0 - offset == GET_MODE_SIZE (mode) + || mem_rtx == NULL_RTX + || reg == PR_REG || SPECIAL_REGISTER_P (reg))) + { + pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0)); + + if (!memory_address_p (mode, XEXP (pre_dec, 0))) + pre_dec = NULL_RTX; + else + { + mem_rtx = NULL_RTX; + offset += GET_MODE_SIZE (mode); + } + } + + if (mem_rtx != NULL_RTX) + goto addr_ok; + + if (offset_in_r0 == -1) + { + emit_move_insn (r0, GEN_INT (offset)); + offset_in_r0 = offset; + } + else if (offset != offset_in_r0) + { + emit_move_insn (r0, + gen_rtx_PLUS + (Pmode, r0, + GEN_INT (offset - offset_in_r0))); + offset_in_r0 += offset - offset_in_r0; + } + + if (pre_dec != NULL_RTX) + { + if (! sp_in_r0) + { + emit_move_insn (r0, + gen_rtx_PLUS + (Pmode, r0, stack_pointer_rtx)); + sp_in_r0 = 1; + } + + offset -= GET_MODE_SIZE (mode); + offset_in_r0 -= GET_MODE_SIZE (mode); + + mem_rtx = pre_dec; + } + else if (sp_in_r0) + mem_rtx = gen_frame_mem (mode, r0); + else + mem_rtx = gen_frame_mem (mode, + gen_rtx_PLUS (Pmode, + stack_pointer_rtx, + r0)); + + /* We must not use an r0-based address for target-branch + registers or for special registers without pre-dec + memory addresses, since we store their values in r0 + first. */ + gcc_assert (!TARGET_REGISTER_P (reg) + && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg)) + || mem_rtx == pre_dec)); + + addr_ok: + orig_reg_rtx = reg_rtx; + if (TARGET_REGISTER_P (reg) + || ((reg == PR_REG || SPECIAL_REGISTER_P (reg)) + && mem_rtx != pre_dec)) + { + rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt); + + emit_move_insn (tmp_reg, reg_rtx); + + if (REGNO (tmp_reg) == R0_REG) + { + offset_in_r0 = -1; + sp_in_r0 = 0; + gcc_assert (!refers_to_regno_p + (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0)); + } + + if (*++tmp_pnt <= 0) + tmp_pnt = schedule.temps; + + reg_rtx = tmp_reg; + } + { + rtx insn; + + /* Mark as interesting for dwarf cfi generator */ + insn = emit_move_insn (mem_rtx, reg_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + /* If we use an intermediate register for the save, we can't + describe this exactly in cfi as a copy of the to-be-saved + register into the temporary register and then the temporary + register on the stack, because the temporary register can + have a different natural size than the to-be-saved register. + Thus, we gloss over the intermediate copy and pretend we do + a direct save from the to-be-saved register. */ + if (REGNO (reg_rtx) != reg) + { + rtx set; + + set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); + } + + if (TARGET_SHCOMPACT && (offset_in_r0 != -1)) + { + rtx reg_rtx = gen_rtx_REG (mode, reg); + rtx set; + rtx mem_rtx = gen_frame_mem (mode, + gen_rtx_PLUS (Pmode, + stack_pointer_rtx, + GEN_INT (offset))); + + set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); + } + } + } + + gcc_assert (entry->offset == d_rounding); + } + else + { + push_regs (&live_regs_mask, current_function_interrupt); + stack_usage += d; + } + + if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) + emit_insn (gen_GOTaddr2picreg ()); + + if (SHMEDIA_REGS_STACK_ADJUST ()) + { + /* This must NOT go through the PLT, otherwise mach and macl + may be clobbered. */ + function_symbol (gen_rtx_REG (Pmode, R0_REG), + (TARGET_FPU_ANY + ? "__GCC_push_shmedia_regs" + : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT); + emit_insn (gen_shmedia_save_restore_regs_compact + (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ()))); + } + + if (target_flags != save_flags && ! current_function_interrupt) + emit_insn (gen_toggle_sz ()); + + target_flags = save_flags; + + output_stack_adjust (-rounded_frame_size (d) + d_rounding, + stack_pointer_rtx, 0, NULL, true); + stack_usage += rounded_frame_size (d) - d_rounding; + + if (frame_pointer_needed) + frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx)); + + if (TARGET_SHCOMPACT + && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1))) + { + /* This must NOT go through the PLT, otherwise mach and macl + may be clobbered. */ + function_symbol (gen_rtx_REG (Pmode, R0_REG), + "__GCC_shcompact_incoming_args", SFUNC_GOT); + emit_insn (gen_shcompact_incoming_args ()); + } + + if (flag_stack_usage) + current_function_static_stack_size = stack_usage; +} + +void +sh_expand_epilogue (bool sibcall_p) +{ + HARD_REG_SET live_regs_mask; + int d, i; + int d_rounding = 0; + + int save_flags = target_flags; + int frame_size, save_size; + int fpscr_deferred = 0; + int e = sibcall_p ? -1 : 1; + + d = calc_live_regs (&live_regs_mask); + + save_size = d; + frame_size = rounded_frame_size (d); + + if (TARGET_SH5) + { + int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask); + int total_size; + if (d % (STACK_BOUNDARY / BITS_PER_UNIT)) + d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT) + - d % (STACK_BOUNDARY / BITS_PER_UNIT)); + + total_size = d + tregs_space; + total_size += rounded_frame_size (total_size); + save_size = total_size - frame_size; + + /* If adjusting the stack in a single step costs nothing extra, do so. + I.e. either if a single addi is enough, or we need a movi anyway, + and we don't exceed the maximum offset range (the test for the + latter is conservative for simplicity). */ + if (TARGET_SHMEDIA + && ! frame_pointer_needed + && (CONST_OK_FOR_I10 (total_size) + || (! CONST_OK_FOR_I10 (save_size + d_rounding) + && total_size <= 2044))) + d_rounding = frame_size; + + frame_size -= d_rounding; + } + + if (frame_pointer_needed) + { + /* We must avoid scheduling the epilogue with previous basic blocks. + See PR/18032 and PR/40313. */ + emit_insn (gen_blockage ()); + output_stack_adjust (frame_size, hard_frame_pointer_rtx, e, + &live_regs_mask, false); + + /* We must avoid moving the stack pointer adjustment past code + which reads from the local frame, else an interrupt could + occur after the SP adjustment and clobber data in the local + frame. */ + emit_insn (gen_blockage ()); + emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx)); + } + else if (frame_size) + { + /* We must avoid moving the stack pointer adjustment past code + which reads from the local frame, else an interrupt could + occur after the SP adjustment and clobber data in the local + frame. */ + emit_insn (gen_blockage ()); + output_stack_adjust (frame_size, stack_pointer_rtx, e, + &live_regs_mask, false); + } + + if (SHMEDIA_REGS_STACK_ADJUST ()) + { + function_symbol (gen_rtx_REG (Pmode, R0_REG), + (TARGET_FPU_ANY + ? "__GCC_pop_shmedia_regs" + : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT); + /* This must NOT go through the PLT, otherwise mach and macl + may be clobbered. */ + emit_insn (gen_shmedia_save_restore_regs_compact + (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ()))); + } + + /* Pop all the registers. */ + + if (target_flags != save_flags && ! current_function_interrupt) + emit_insn (gen_toggle_sz ()); + if (TARGET_SH5) + { + int offset_base, offset; + int offset_in_r0 = -1; + int sp_in_r0 = 0; + rtx r0 = gen_rtx_REG (Pmode, R0_REG); + save_schedule schedule; + save_entry *entry; + int *tmp_pnt; + + entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding); + offset_base = -entry[1].offset + d_rounding; + tmp_pnt = schedule.temps; + for (; entry->mode != VOIDmode; entry--) + { + enum machine_mode mode = (enum machine_mode) entry->mode; + int reg = entry->reg; + rtx reg_rtx, mem_rtx, post_inc = NULL_RTX; + + offset = offset_base + entry->offset; + reg_rtx = gen_rtx_REG (mode, reg); + + mem_rtx = gen_frame_mem (mode, + gen_rtx_PLUS (Pmode, + stack_pointer_rtx, + GEN_INT (offset))); + + if (!memory_address_p (mode, XEXP (mem_rtx, 0))) + mem_rtx = NULL_RTX; + + if (HAVE_POST_INCREMENT + && (offset == offset_in_r0 + || (offset + GET_MODE_SIZE (mode) != d + d_rounding + && mem_rtx == NULL_RTX) + || reg == PR_REG || SPECIAL_REGISTER_P (reg))) + { + post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0)); + + if (!memory_address_p (mode, XEXP (post_inc, 0))) + post_inc = NULL_RTX; + else + mem_rtx = NULL_RTX; + } + + if (mem_rtx != NULL_RTX) + goto addr_ok; + + if (offset_in_r0 == -1) + { + emit_move_insn (r0, GEN_INT (offset)); + offset_in_r0 = offset; + } + else if (offset != offset_in_r0) + { + emit_move_insn (r0, + gen_rtx_PLUS + (Pmode, r0, + GEN_INT (offset - offset_in_r0))); + offset_in_r0 += offset - offset_in_r0; + } + + if (post_inc != NULL_RTX) + { + if (! sp_in_r0) + { + emit_move_insn (r0, + gen_rtx_PLUS + (Pmode, r0, stack_pointer_rtx)); + sp_in_r0 = 1; + } + + mem_rtx = post_inc; + + offset_in_r0 += GET_MODE_SIZE (mode); + } + else if (sp_in_r0) + mem_rtx = gen_frame_mem (mode, r0); + else + mem_rtx = gen_frame_mem (mode, + gen_rtx_PLUS (Pmode, + stack_pointer_rtx, + r0)); + + gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg)) + || mem_rtx == post_inc); + + addr_ok: + if ((reg == PR_REG || SPECIAL_REGISTER_P (reg)) + && mem_rtx != post_inc) + { + emit_move_insn (r0, mem_rtx); + mem_rtx = r0; + } + else if (TARGET_REGISTER_P (reg)) + { + rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt); + + /* Give the scheduler a bit of freedom by using up to + MAX_TEMPS registers in a round-robin fashion. */ + emit_move_insn (tmp_reg, mem_rtx); + mem_rtx = tmp_reg; + if (*++tmp_pnt < 0) + tmp_pnt = schedule.temps; + } + + emit_move_insn (reg_rtx, mem_rtx); + } + + gcc_assert (entry->offset + offset_base == d + d_rounding); + } + else /* ! TARGET_SH5 */ + { + int last_reg; + + save_size = 0; + /* For an ISR with RESBANK attribute assigned, don't pop PR + register. */ + if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG) + && !sh_cfun_resbank_handler_p ()) + { + if (!frame_pointer_needed) + emit_insn (gen_blockage ()); + pop (PR_REG); + } + + /* Banked registers are popped first to avoid being scheduled in the + delay slot. RTE switches banks before the ds instruction. */ + if (current_function_interrupt) + { + bool use_movml = false; + + if (TARGET_SH2A) + { + unsigned int count = 0; + + for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) + if (TEST_HARD_REG_BIT (live_regs_mask, i)) + count++; + else + break; + + /* Use movml when all banked register are poped. */ + if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1) + use_movml = true; + } + + if (use_movml) + { + rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); + + /* We must avoid scheduling multiple load insn with another + insns. */ + emit_insn (gen_blockage ()); + emit_insn (gen_movml_pop_banked (sp_reg)); + emit_insn (gen_blockage ()); + } + else + for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--) + if (TEST_HARD_REG_BIT (live_regs_mask, i)) + pop (i); + + last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1; + } + else + last_reg = FIRST_PSEUDO_REGISTER; + + for (i = 0; i < last_reg; i++) + { + int j = (FIRST_PSEUDO_REGISTER - 1) - i; + + if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD + && hard_reg_set_intersect_p (live_regs_mask, + reg_class_contents[DF_REGS])) + fpscr_deferred = 1; + /* For an ISR with RESBANK attribute assigned, don't pop + following registers, R0-R14, MACH, MACL and GBR. */ + else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) + && ! (sh_cfun_resbank_handler_p () + && ((j >= FIRST_GENERAL_REG + && j < LAST_GENERAL_REG) + || j == MACH_REG + || j == MACL_REG + || j == GBR_REG))) + pop (j); + + if (j == FIRST_FP_REG && fpscr_deferred) + pop (FPSCR_REG); + } + } + if (target_flags != save_flags && ! current_function_interrupt) + emit_insn (gen_toggle_sz ()); + target_flags = save_flags; + + output_stack_adjust (crtl->args.pretend_args_size + + save_size + d_rounding + + crtl->args.info.stack_regs * 8, + stack_pointer_rtx, e, NULL, false); + + if (crtl->calls_eh_return) + emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx, + EH_RETURN_STACKADJ_RTX)); + + /* Switch back to the normal stack if necessary. */ + if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl))) + emit_insn (gen_sp_switch_2 ()); + + /* Tell flow the insn that pops PR isn't dead. */ + /* PR_REG will never be live in SHmedia mode, and we don't need to + USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG + by the return pattern. */ + if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)) + emit_use (gen_rtx_REG (SImode, PR_REG)); +} + +static int sh_need_epilogue_known = 0; + +int +sh_need_epilogue (void) +{ + if (! sh_need_epilogue_known) + { + rtx epilogue; + + start_sequence (); + sh_expand_epilogue (0); + epilogue = get_insns (); + end_sequence (); + sh_need_epilogue_known = (epilogue == NULL ? -1 : 1); + } + return sh_need_epilogue_known > 0; +} + +/* Emit code to change the current function's return address to RA. + TEMP is available as a scratch register, if needed. */ + +void +sh_set_return_address (rtx ra, rtx tmp) +{ + HARD_REG_SET live_regs_mask; + int d; + int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG; + int pr_offset; + + d = calc_live_regs (&live_regs_mask); + + /* If pr_reg isn't life, we can set it (or the register given in + sh_media_register_for_return) directly. */ + if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg)) + { + rtx rr; + + if (TARGET_SHMEDIA) + { + int rr_regno = sh_media_register_for_return (); + + if (rr_regno < 0) + rr_regno = pr_reg; + + rr = gen_rtx_REG (DImode, rr_regno); + } + else + rr = gen_rtx_REG (SImode, pr_reg); + + emit_insn (GEN_MOV (rr, ra)); + /* Tell flow the register for return isn't dead. */ + emit_use (rr); + return; + } + + if (TARGET_SH5) + { + int offset; + save_schedule schedule; + save_entry *entry; + + entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0); + offset = entry[1].offset; + for (; entry->mode != VOIDmode; entry--) + if (entry->reg == pr_reg) + goto found; + + /* We can't find pr register. */ + gcc_unreachable (); + + found: + offset = entry->offset - offset; + pr_offset = (rounded_frame_size (d) + offset + + SHMEDIA_REGS_STACK_ADJUST ()); + } + else + pr_offset = rounded_frame_size (d); + + emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset))); + + if (frame_pointer_needed) + emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx)); + else + emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx)); + + tmp = gen_frame_mem (Pmode, tmp); + emit_insn (GEN_MOV (tmp, ra)); + /* Tell this store isn't dead. */ + emit_use (tmp); +} + +/* Clear variables at function end. */ + +static void +sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, + HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + sh_need_epilogue_known = 0; +} + +static rtx +sh_builtin_saveregs (void) +{ + /* First unnamed integer register. */ + int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT]; + /* Number of integer registers we need to save. */ + int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg); + /* First unnamed SFmode float reg */ + int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT]; + /* Number of SFmode float regs to save. */ + int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg); + rtx regbuf, fpregs; + int bufsize, regno; + alias_set_type alias_set; + + if (TARGET_SH5) + { + if (n_intregs) + { + int pushregs = n_intregs; + + while (pushregs < NPARM_REGS (SImode) - 1 + && (CALL_COOKIE_INT_REG_GET + (crtl->args.info.call_cookie, + NPARM_REGS (SImode) - pushregs) + == 1)) + { + crtl->args.info.call_cookie + &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode) + - pushregs, 1); + pushregs++; + } + + if (pushregs == NPARM_REGS (SImode)) + crtl->args.info.call_cookie + |= (CALL_COOKIE_INT_REG (0, 1) + | CALL_COOKIE_STACKSEQ (pushregs - 1)); + else + crtl->args.info.call_cookie + |= CALL_COOKIE_STACKSEQ (pushregs); + + crtl->args.pretend_args_size += 8 * n_intregs; + } + if (TARGET_SHCOMPACT) + return const0_rtx; + } + + if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5) + { + error ("__builtin_saveregs not supported by this subtarget"); + return const0_rtx; + } + + if (TARGET_SHMEDIA) + n_floatregs = 0; + + /* Allocate block of memory for the regs. */ + /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte? + Or can assign_stack_local accept a 0 SIZE argument? */ + bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD); + + if (TARGET_SHMEDIA) + regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM)); + else if (n_floatregs & 1) + { + rtx addr; + + regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0); + addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0)); + emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD))); + regbuf = change_address (regbuf, BLKmode, addr); + } + else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs) + { + rtx addr, mask; + + regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0); + addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4)); + mask = copy_to_mode_reg (Pmode, GEN_INT (-8)); + emit_insn (gen_andsi3 (addr, addr, mask)); + regbuf = change_address (regbuf, BLKmode, addr); + } + else + regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0); + alias_set = get_varargs_alias_set (); + set_mem_alias_set (regbuf, alias_set); + + /* Save int args. + This is optimized to only save the regs that are necessary. Explicitly + named args need not be saved. */ + if (n_intregs > 0) + move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg, + adjust_address (regbuf, BLKmode, + n_floatregs * UNITS_PER_WORD), + n_intregs); + + if (TARGET_SHMEDIA) + /* Return the address of the regbuf. */ + return XEXP (regbuf, 0); + + /* Save float args. + This is optimized to only save the regs that are necessary. Explicitly + named args need not be saved. + We explicitly build a pointer to the buffer because it halves the insn + count when not optimizing (otherwise the pointer is built for each reg + saved). + We emit the moves in reverse order so that we can use predecrement. */ + + fpregs = copy_to_mode_reg (Pmode, + plus_constant (XEXP (regbuf, 0), + n_floatregs * UNITS_PER_WORD)); + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + rtx mem; + for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2) + { + emit_insn (gen_addsi3 (fpregs, fpregs, + GEN_INT (-2 * UNITS_PER_WORD))); + mem = change_address (regbuf, DFmode, fpregs); + emit_move_insn (mem, + gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno)); + } + regno = first_floatreg; + if (regno & 1) + { + emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD))); + mem = change_address (regbuf, SFmode, fpregs); + emit_move_insn (mem, + gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno + - (TARGET_LITTLE_ENDIAN != 0))); + } + } + else + for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--) + { + rtx mem; + + emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD))); + mem = change_address (regbuf, SFmode, fpregs); + emit_move_insn (mem, + gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno)); + } + + /* Return the address of the regbuf. */ + return XEXP (regbuf, 0); +} + +/* Define the `__builtin_va_list' type for the ABI. */ + +static tree +sh_build_builtin_va_list (void) +{ + tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; + tree record, type_decl; + + if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4) + || TARGET_HITACHI || sh_cfun_attr_renesas_p ()) + return ptr_type_node; + + record = (*lang_hooks.types.make_type) (RECORD_TYPE); + type_decl = build_decl (BUILTINS_LOCATION, + TYPE_DECL, get_identifier ("__va_list_tag"), record); + + f_next_o = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__va_next_o"), + ptr_type_node); + f_next_o_limit = build_decl (BUILTINS_LOCATION, + FIELD_DECL, + get_identifier ("__va_next_o_limit"), + ptr_type_node); + f_next_fp = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__va_next_fp"), + ptr_type_node); + f_next_fp_limit = build_decl (BUILTINS_LOCATION, + FIELD_DECL, + get_identifier ("__va_next_fp_limit"), + ptr_type_node); + f_next_stack = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__va_next_stack"), + ptr_type_node); + + DECL_FIELD_CONTEXT (f_next_o) = record; + DECL_FIELD_CONTEXT (f_next_o_limit) = record; + DECL_FIELD_CONTEXT (f_next_fp) = record; + DECL_FIELD_CONTEXT (f_next_fp_limit) = record; + DECL_FIELD_CONTEXT (f_next_stack) = record; + + TYPE_STUB_DECL (record) = type_decl; + TYPE_NAME (record) = type_decl; + TYPE_FIELDS (record) = f_next_o; + DECL_CHAIN (f_next_o) = f_next_o_limit; + DECL_CHAIN (f_next_o_limit) = f_next_fp; + DECL_CHAIN (f_next_fp) = f_next_fp_limit; + DECL_CHAIN (f_next_fp_limit) = f_next_stack; + + layout_type (record); + + return record; +} + +/* Implement `va_start' for varargs and stdarg. */ + +static void +sh_va_start (tree valist, rtx nextarg) +{ + tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; + tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack; + tree t, u; + int nfp, nint; + + if (TARGET_SH5) + { + expand_builtin_saveregs (); + std_expand_builtin_va_start (valist, nextarg); + return; + } + + if ((! TARGET_SH2E && ! TARGET_SH4) + || TARGET_HITACHI || sh_cfun_attr_renesas_p ()) + { + std_expand_builtin_va_start (valist, nextarg); + return; + } + + f_next_o = TYPE_FIELDS (va_list_type_node); + f_next_o_limit = DECL_CHAIN (f_next_o); + f_next_fp = DECL_CHAIN (f_next_o_limit); + f_next_fp_limit = DECL_CHAIN (f_next_fp); + f_next_stack = DECL_CHAIN (f_next_fp_limit); + + next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o, + NULL_TREE); + next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit), + valist, f_next_o_limit, NULL_TREE); + next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp, + NULL_TREE); + next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit), + valist, f_next_fp_limit, NULL_TREE); + next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack), + valist, f_next_stack, NULL_TREE); + + /* Call __builtin_saveregs. */ + u = make_tree (sizetype, expand_builtin_saveregs ()); + u = fold_convert (ptr_type_node, u); + t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + nfp = crtl->args.info.arg_count[SH_ARG_FLOAT]; + if (nfp < 8) + nfp = 8 - nfp; + else + nfp = 0; + u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u, + size_int (UNITS_PER_WORD * nfp)); + t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + nint = crtl->args.info.arg_count[SH_ARG_INT]; + if (nint < 4) + nint = 4 - nint; + else + nint = 0; + u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u, + size_int (UNITS_PER_WORD * nint)); + t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + u = make_tree (ptr_type_node, nextarg); + t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); +} + +/* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized + member, return it. */ +static tree +find_sole_member (tree type) +{ + tree field, member = NULL_TREE; + + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + if (!DECL_SIZE (field)) + return NULL_TREE; + if (integer_zerop (DECL_SIZE (field))) + continue; + if (member) + return NULL_TREE; + member = field; + } + return member; +} +/* Implement `va_arg'. */ + +static tree +sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT size, rsize; + tree tmp, pptr_type_node; + tree addr, lab_over = NULL, result = NULL; + int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type); + tree eff_type; + + if (pass_by_ref) + type = build_pointer_type (type); + + size = int_size_in_bytes (type); + rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; + pptr_type_node = build_pointer_type (ptr_type_node); + + if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4) + && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ())) + { + tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; + tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack; + int pass_as_float; + tree lab_false; + tree member; + + f_next_o = TYPE_FIELDS (va_list_type_node); + f_next_o_limit = DECL_CHAIN (f_next_o); + f_next_fp = DECL_CHAIN (f_next_o_limit); + f_next_fp_limit = DECL_CHAIN (f_next_fp); + f_next_stack = DECL_CHAIN (f_next_fp_limit); + + next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o, + NULL_TREE); + next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit), + valist, f_next_o_limit, NULL_TREE); + next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), + valist, f_next_fp, NULL_TREE); + next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit), + valist, f_next_fp_limit, NULL_TREE); + next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack), + valist, f_next_stack, NULL_TREE); + + /* Structures with a single member with a distinct mode are passed + like their member. This is relevant if the latter has a REAL_TYPE + or COMPLEX_TYPE type. */ + eff_type = type; + while (TREE_CODE (eff_type) == RECORD_TYPE + && (member = find_sole_member (eff_type)) + && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE + || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE + || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE)) + { + tree field_type = TREE_TYPE (member); + + if (TYPE_MODE (eff_type) == TYPE_MODE (field_type)) + eff_type = field_type; + else + { + gcc_assert ((TYPE_ALIGN (eff_type) + < GET_MODE_ALIGNMENT (TYPE_MODE (field_type))) + || (TYPE_ALIGN (eff_type) + > GET_MODE_BITSIZE (TYPE_MODE (field_type)))); + break; + } + } + + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8) + || (TREE_CODE (eff_type) == COMPLEX_TYPE + && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE + && size <= 16)); + } + else + { + pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4); + } + + addr = create_tmp_var (pptr_type_node, NULL); + lab_false = create_artificial_label (UNKNOWN_LOCATION); + lab_over = create_artificial_label (UNKNOWN_LOCATION); + + valist = build_simple_mem_ref (addr); + + if (pass_as_float) + { + tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL); + tree cmp; + bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE; + + tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp)); + gimplify_assign (unshare_expr (addr), tmp, pre_p); + + gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p); + tmp = next_fp_limit; + if (size > 4 && !is_double) + tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), + unshare_expr (tmp), size_int (4 - size)); + tmp = build2 (GE_EXPR, boolean_type_node, + unshare_expr (next_fp_tmp), unshare_expr (tmp)); + cmp = build3 (COND_EXPR, void_type_node, tmp, + build1 (GOTO_EXPR, void_type_node, + unshare_expr (lab_false)), NULL_TREE); + if (!is_double) + gimplify_and_add (cmp, pre_p); + + if (TYPE_ALIGN (eff_type) > BITS_PER_WORD + || (is_double || size == 16)) + { + tmp = fold_convert (sizetype, next_fp_tmp); + tmp = build2 (BIT_AND_EXPR, sizetype, tmp, + size_int (UNITS_PER_WORD)); + tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, + unshare_expr (next_fp_tmp), tmp); + gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p); + } + if (is_double) + gimplify_and_add (cmp, pre_p); + +#ifdef FUNCTION_ARG_SCmode_WART + if (TYPE_MODE (eff_type) == SCmode + && TARGET_SH4 && TARGET_LITTLE_ENDIAN) + { + tree subtype = TREE_TYPE (eff_type); + tree real, imag; + + imag + = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL); + imag = get_initialized_tmp_var (imag, pre_p, NULL); + + real + = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL); + real = get_initialized_tmp_var (real, pre_p, NULL); + + result = build2 (COMPLEX_EXPR, eff_type, real, imag); + if (type != eff_type) + result = build1 (VIEW_CONVERT_EXPR, type, result); + result = get_initialized_tmp_var (result, pre_p, NULL); + } +#endif /* FUNCTION_ARG_SCmode_WART */ + + tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over)); + gimplify_and_add (tmp, pre_p); + + tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false)); + gimplify_and_add (tmp, pre_p); + + tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack)); + gimplify_assign (unshare_expr (addr), tmp, pre_p); + gimplify_assign (unshare_expr (next_fp_tmp), + unshare_expr (valist), pre_p); + + gimplify_assign (unshare_expr (valist), + unshare_expr (next_fp_tmp), post_p); + valist = next_fp_tmp; + } + else + { + tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, + unshare_expr (next_o), size_int (rsize)); + tmp = build2 (GT_EXPR, boolean_type_node, tmp, + unshare_expr (next_o_limit)); + tmp = build3 (COND_EXPR, void_type_node, tmp, + build1 (GOTO_EXPR, void_type_node, + unshare_expr (lab_false)), + NULL_TREE); + gimplify_and_add (tmp, pre_p); + + tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o)); + gimplify_assign (unshare_expr (addr), tmp, pre_p); + + tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over)); + gimplify_and_add (tmp, pre_p); + + tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false)); + gimplify_and_add (tmp, pre_p); + + if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A)) + gimplify_assign (unshare_expr (next_o), + unshare_expr (next_o_limit), pre_p); + + tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack)); + gimplify_assign (unshare_expr (addr), tmp, pre_p); + } + + if (!result) + { + tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over)); + gimplify_and_add (tmp, pre_p); + } + } + + /* ??? In va-sh.h, there had been code to make values larger than + size 8 indirect. This does not match the FUNCTION_ARG macros. */ + + tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL); + if (result) + { + gimplify_assign (result, tmp, pre_p); + result = build1 (NOP_EXPR, TREE_TYPE (result), result); + tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over)); + gimplify_and_add (tmp, pre_p); + } + else + result = tmp; + + if (pass_by_ref) + result = build_va_arg_indirect_ref (result); + + return result; +} + +/* 64 bit floating points memory transfers are paired single precision loads + or store. So DWARF information needs fixing in little endian (unless + PR=SZ=1 in FPSCR). */ +rtx +sh_dwarf_register_span (rtx reg) +{ + unsigned regno = REGNO (reg); + + if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode) + return NULL_RTX; + + return + gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, + gen_rtx_REG (SFmode, + DBX_REGISTER_NUMBER (regno+1)), + gen_rtx_REG (SFmode, + DBX_REGISTER_NUMBER (regno)))); +} + +static enum machine_mode +sh_promote_function_mode (const_tree type, enum machine_mode mode, + int *punsignedp, const_tree funtype, + int for_return) +{ + if (sh_promote_prototypes (funtype)) + return promote_mode (type, mode, punsignedp); + else + return default_promote_function_mode (type, mode, punsignedp, funtype, + for_return); +} + +static bool +sh_promote_prototypes (const_tree type) +{ + if (TARGET_HITACHI) + return 0; + if (! type) + return 1; + return ! sh_attr_renesas_p (type); +} + +/* Whether an argument must be passed by reference. On SHcompact, we + pretend arguments wider than 32-bits that would have been passed in + registers are passed by reference, so that an SHmedia trampoline + loads them into the full 64-bits registers. */ + +static int +shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named) +{ + unsigned HOST_WIDE_INT size; + + if (type) + size = int_size_in_bytes (type); + else + size = GET_MODE_SIZE (mode); + + if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode) + && (!named + || GET_SH_ARG_CLASS (mode) == SH_ARG_INT + || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT + && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode))) + && size > 4 + && !SHCOMPACT_FORCE_ON_STACK (mode, type) + && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named)) + return size; + else + return 0; +} + +static bool +sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named) +{ + if (targetm.calls.must_pass_in_stack (mode, type)) + return true; + + /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function + wants to know about pass-by-reference semantics for incoming + arguments. */ + if (! cum) + return false; + + if (TARGET_SHCOMPACT) + { + cum->byref = shcompact_byref (cum, mode, type, named); + return cum->byref != 0; + } + + return false; +} + +static bool +sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + /* ??? How can it possibly be correct to return true only on the + caller side of the equation? Is there someplace else in the + sh backend that's magically producing the copies? */ + return (cum->outgoing + && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) + % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0)); +} + +static int +sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, + tree type, bool named ATTRIBUTE_UNUSED) +{ + int words = 0; + + if (!TARGET_SH5 + && PASS_IN_REG_P (*cum, mode, type) + && !(TARGET_SH4 || TARGET_SH2A_DOUBLE) + && (ROUND_REG (*cum, mode) + + (mode != BLKmode + ? ROUND_ADVANCE (GET_MODE_SIZE (mode)) + : ROUND_ADVANCE (int_size_in_bytes (type))) + > NPARM_REGS (mode))) + words = NPARM_REGS (mode) - ROUND_REG (*cum, mode); + + else if (!TARGET_SHCOMPACT + && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named)) + words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT]; + + return words * UNITS_PER_WORD; +} + + +/* Define where to put the arguments to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + + On SH the first args are normally in registers + and the rest are pushed. Any arg that starts within the first + NPARM_REGS words is at least partially passed in a register unless + its data type forbids. */ + +static rtx +sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode, + const_tree type, bool named) +{ + if (! TARGET_SH5 && mode == VOIDmode) + return GEN_INT (ca->renesas_abi ? 1 : 0); + + if (! TARGET_SH5 + && PASS_IN_REG_P (*ca, mode, type) + && (named || ! (TARGET_HITACHI || ca->renesas_abi))) + { + int regno; + + if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN + && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1))) + { + rtx r1 = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (SFmode, + BASE_ARG_REG (mode) + + (ROUND_REG (*ca, mode) ^ 1)), + const0_rtx); + rtx r2 = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (SFmode, + BASE_ARG_REG (mode) + + ((ROUND_REG (*ca, mode) + 1) ^ 1)), + GEN_INT (4)); + return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2)); + } + + /* If the alignment of a DF value causes an SF register to be + skipped, we will use that skipped register for the next SF + value. */ + if ((TARGET_HITACHI || ca->renesas_abi) + && ca->free_single_fp_reg + && mode == SFmode) + return gen_rtx_REG (mode, ca->free_single_fp_reg); + + regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode)) + ^ (mode == SFmode && TARGET_SH4 + && TARGET_LITTLE_ENDIAN != 0 + && ! TARGET_HITACHI && ! ca->renesas_abi); + return gen_rtx_REG (mode, regno); + + } + + if (TARGET_SH5) + { + if (mode == VOIDmode && TARGET_SHCOMPACT) + return GEN_INT (ca->call_cookie); + + /* The following test assumes unnamed arguments are promoted to + DFmode. */ + if (mode == SFmode && ca->free_single_fp_reg) + return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg); + + if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT) + && (named || ! ca->prototype_p) + && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode)) + { + if (! ca->prototype_p && TARGET_SHMEDIA) + return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode); + + return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, + FIRST_FP_PARM_REG + + ca->arg_count[(int) SH_ARG_FLOAT]); + } + + if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) + && (! TARGET_SHCOMPACT + || (! SHCOMPACT_FORCE_ON_STACK (mode, type) + && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode, + type, named)))) + { + return gen_rtx_REG (mode, (FIRST_PARM_REG + + ca->arg_count[(int) SH_ARG_INT])); + } + + return 0; + } + + return 0; +} + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be + available.) */ + +static void +sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode, + const_tree type, bool named) +{ + if (ca->force_mem) + ca->force_mem = 0; + else if (TARGET_SH5) + { + const_tree type2 = (ca->byref && type + ? TREE_TYPE (type) + : type); + enum machine_mode mode2 = (ca->byref && type + ? TYPE_MODE (type2) + : mode); + int dwords = ((ca->byref + ? ca->byref + : mode2 == BLKmode + ? int_size_in_bytes (type2) + : GET_MODE_SIZE (mode2)) + 7) / 8; + int numregs = MIN (dwords, NPARM_REGS (SImode) + - ca->arg_count[(int) SH_ARG_INT]); + + if (numregs) + { + ca->arg_count[(int) SH_ARG_INT] += numregs; + if (TARGET_SHCOMPACT + && SHCOMPACT_FORCE_ON_STACK (mode2, type2)) + { + ca->call_cookie + |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT] + - numregs, 1); + /* N.B. We want this also for outgoing. */ + ca->stack_regs += numregs; + } + else if (ca->byref) + { + if (! ca->outgoing) + ca->stack_regs += numregs; + ca->byref_regs += numregs; + ca->byref = 0; + do + ca->call_cookie + |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT] + - numregs, 2); + while (--numregs); + ca->call_cookie + |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT] + - 1, 1); + } + else if (dwords > numregs) + { + int pushregs = numregs; + + if (TARGET_SHCOMPACT) + ca->stack_regs += numregs; + while (pushregs < NPARM_REGS (SImode) - 1 + && (CALL_COOKIE_INT_REG_GET + (ca->call_cookie, + NPARM_REGS (SImode) - pushregs) + == 1)) + { + ca->call_cookie + &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode) + - pushregs, 1); + pushregs++; + } + if (numregs == NPARM_REGS (SImode)) + ca->call_cookie + |= CALL_COOKIE_INT_REG (0, 1) + | CALL_COOKIE_STACKSEQ (numregs - 1); + else + ca->call_cookie + |= CALL_COOKIE_STACKSEQ (numregs); + } + } + if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT + && (named || ! ca->prototype_p)) + { + if (mode2 == SFmode && ca->free_single_fp_reg) + ca->free_single_fp_reg = 0; + else if (ca->arg_count[(int) SH_ARG_FLOAT] + < NPARM_REGS (SFmode)) + { + int numfpregs + = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2, + NPARM_REGS (SFmode) + - ca->arg_count[(int) SH_ARG_FLOAT]); + + ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs; + + if (TARGET_SHCOMPACT && ! ca->prototype_p) + { + if (ca->outgoing && numregs > 0) + do + { + ca->call_cookie + |= (CALL_COOKIE_INT_REG + (ca->arg_count[(int) SH_ARG_INT] + - numregs + ((numfpregs - 2) / 2), + 4 + (ca->arg_count[(int) SH_ARG_FLOAT] + - numfpregs) / 2)); + } + while (numfpregs -= 2); + } + else if (mode2 == SFmode && (named) + && (ca->arg_count[(int) SH_ARG_FLOAT] + < NPARM_REGS (SFmode))) + ca->free_single_fp_reg + = FIRST_FP_PARM_REG - numfpregs + + ca->arg_count[(int) SH_ARG_FLOAT] + 1; + } + } + return; + } + + if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE) + { + /* Note that we've used the skipped register. */ + if (mode == SFmode && ca->free_single_fp_reg) + { + ca->free_single_fp_reg = 0; + return; + } + /* When we have a DF after an SF, there's an SF register that get + skipped in order to align the DF value. We note this skipped + register, because the next SF value will use it, and not the + SF that follows the DF. */ + if (mode == DFmode + && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode)) + { + ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode) + + BASE_ARG_REG (mode)); + } + } + + if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi) + || PASS_IN_REG_P (*ca, mode, type)) + (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)] + = (ROUND_REG (*ca, mode) + + (mode == BLKmode + ? ROUND_ADVANCE (int_size_in_bytes (type)) + : ROUND_ADVANCE (GET_MODE_SIZE (mode))))); +} + +/* The Renesas calling convention doesn't quite fit into this scheme since + the address is passed like an invisible argument, but one that is always + passed in memory. */ +static rtx +sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED) +{ + if (TARGET_HITACHI || sh_attr_renesas_p (fndecl)) + return 0; + return gen_rtx_REG (Pmode, 2); +} + +/* Worker function for TARGET_FUNCTION_VALUE. + + For the SH, this is like LIBCALL_VALUE, except that we must change the + mode like PROMOTE_MODE does. + ??? PROMOTE_MODE is ignored for non-scalar types. The set of types + tested here has to be kept in sync with the one in explow.c:promote_mode. +*/ + +static rtx +sh_function_value (const_tree valtype, + const_tree fn_decl_or_type, + bool outgoing ATTRIBUTE_UNUSED) +{ + if (fn_decl_or_type + && !DECL_P (fn_decl_or_type)) + fn_decl_or_type = NULL; + + return gen_rtx_REG ( + ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT + && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4 + && (TREE_CODE (valtype) == INTEGER_TYPE + || TREE_CODE (valtype) == ENUMERAL_TYPE + || TREE_CODE (valtype) == BOOLEAN_TYPE + || TREE_CODE (valtype) == REAL_TYPE + || TREE_CODE (valtype) == OFFSET_TYPE)) + && sh_promote_prototypes (fn_decl_or_type) + ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)), + BASE_RETURN_VALUE_REG (TYPE_MODE (valtype))); +} + +/* Worker function for TARGET_LIBCALL_VALUE. */ + +static rtx +sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode)); +} + +/* Return true if N is a possible register number of function value. */ + +static bool +sh_function_value_regno_p (const unsigned int regno) +{ + return ((regno) == FIRST_RET_REG + || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG) + || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG)); +} + +/* Worker function for TARGET_RETURN_IN_MEMORY. */ + +static bool +sh_return_in_memory (const_tree type, const_tree fndecl) +{ + if (TARGET_SH5) + { + if (TYPE_MODE (type) == BLKmode) + return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8; + else + return GET_MODE_SIZE (TYPE_MODE (type)) > 8; + } + else + { + return (TYPE_MODE (type) == BLKmode + || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl)) + && TREE_CODE (type) == RECORD_TYPE)); + } +} + +/* We actually emit the code in sh_expand_prologue. We used to use + a static variable to flag that we need to emit this code, but that + doesn't when inlining, when functions are deferred and then emitted + later. Fortunately, we already have two flags that are part of struct + function that tell if a function uses varargs or stdarg. */ +static void +sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca, + enum machine_mode mode, + tree type, + int *pretend_arg_size, + int second_time ATTRIBUTE_UNUSED) +{ + gcc_assert (cfun->stdarg); + if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)) + { + int named_parm_regs, anon_parm_regs; + + named_parm_regs = (ROUND_REG (*ca, mode) + + (mode == BLKmode + ? ROUND_ADVANCE (int_size_in_bytes (type)) + : ROUND_ADVANCE (GET_MODE_SIZE (mode)))); + anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs; + if (anon_parm_regs > 0) + *pretend_arg_size = anon_parm_regs * 4; + } +} + +static bool +sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED) +{ + return TARGET_SH5; +} + +static bool +sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca) +{ + return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5; +} + + +/* Define the offset between two registers, one to be eliminated, and + the other its replacement, at the start of a routine. */ + +int +initial_elimination_offset (int from, int to) +{ + int regs_saved; + int regs_saved_rounding = 0; + int total_saved_regs_space; + int total_auto_space; + int save_flags = target_flags; + int copy_flags; + HARD_REG_SET live_regs_mask; + + shmedia_space_reserved_for_target_registers = false; + regs_saved = calc_live_regs (&live_regs_mask); + regs_saved += SHMEDIA_REGS_STACK_ADJUST (); + + if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask)) + { + shmedia_space_reserved_for_target_registers = true; + regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask); + } + + if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT)) + regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT) + - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT)); + + total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding; + copy_flags = target_flags; + target_flags = save_flags; + + total_saved_regs_space = regs_saved + regs_saved_rounding; + + if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return total_saved_regs_space + total_auto_space + + crtl->args.info.byref_regs * 8; + + if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return total_saved_regs_space + total_auto_space + + crtl->args.info.byref_regs * 8; + + /* Initial gap between fp and sp is 0. */ + if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return 0; + + if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return rounded_frame_size (0); + + if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return rounded_frame_size (0); + + gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM + && (to == HARD_FRAME_POINTER_REGNUM + || to == STACK_POINTER_REGNUM)); + if (TARGET_SH5) + { + int n = total_saved_regs_space; + int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG; + save_schedule schedule; + save_entry *entry; + + n += total_auto_space; + + /* If it wasn't saved, there's not much we can do. */ + if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg)) + return n; + + target_flags = copy_flags; + + sh5_schedule_saves (&live_regs_mask, &schedule, n); + for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++) + if (entry->reg == pr_reg) + { + target_flags = save_flags; + return entry->offset; + } + gcc_unreachable (); + } + else + return total_auto_space; +} + +/* Parse the -mfixed-range= option string. */ +void +sh_fix_range (const char *const_str) +{ + int i, first, last; + char *str, *dash, *comma; + + /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and + REG2 are either register names or register numbers. The effect + of this option is to mark the registers in the range from REG1 to + REG2 as ``fixed'' so they won't be used by the compiler. */ + + i = strlen (const_str); + str = (char *) alloca (i + 1); + memcpy (str, const_str, i + 1); + + while (1) + { + dash = strchr (str, '-'); + if (!dash) + { + warning (0, "value of -mfixed-range must have form REG1-REG2"); + return; + } + *dash = '\0'; + comma = strchr (dash + 1, ','); + if (comma) + *comma = '\0'; + + first = decode_reg_name (str); + if (first < 0) + { + warning (0, "unknown register name: %s", str); + return; + } + + last = decode_reg_name (dash + 1); + if (last < 0) + { + warning (0, "unknown register name: %s", dash + 1); + return; + } + + *dash = '-'; + + if (first > last) + { + warning (0, "%s-%s is an empty range", str, dash + 1); + return; + } + + for (i = first; i <= last; ++i) + fixed_regs[i] = call_used_regs[i] = 1; + + if (!comma) + break; + + *comma = ','; + str = comma + 1; + } +} + +/* Insert any deferred function attributes from earlier pragmas. */ +static void +sh_insert_attributes (tree node, tree *attributes) +{ + tree attrs; + + if (TREE_CODE (node) != FUNCTION_DECL) + return; + + /* We are only interested in fields. */ + if (!DECL_P (node)) + return; + + /* Append the attributes to the deferred attributes. */ + *sh_deferred_function_attributes_tail = *attributes; + attrs = sh_deferred_function_attributes; + if (!attrs) + return; + + /* Some attributes imply or require the interrupt attribute. */ + if (!lookup_attribute ("interrupt_handler", attrs) + && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node))) + { + /* If we have a trapa_handler, but no interrupt_handler attribute, + insert an interrupt_handler attribute. */ + if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE) + /* We can't use sh_pr_interrupt here because that's not in the + java frontend. */ + attrs + = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs); + /* However, for sp_switch, trap_exit, nosave_low_regs and resbank, + if the interrupt attribute is missing, we ignore the attribute + and warn. */ + else if (lookup_attribute ("sp_switch", attrs) + || lookup_attribute ("trap_exit", attrs) + || lookup_attribute ("nosave_low_regs", attrs) + || lookup_attribute ("resbank", attrs)) + { + tree *tail; + + for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs)) + { + if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs)) + || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs)) + || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)) + || is_attribute_p ("resbank", TREE_PURPOSE (attrs))) + warning (OPT_Wattributes, + "%qE attribute only applies to interrupt functions", + TREE_PURPOSE (attrs)); + else + { + *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE, + NULL_TREE); + tail = &TREE_CHAIN (*tail); + } + } + attrs = *attributes; + } + } + + /* Install the processed list. */ + *attributes = attrs; + + /* Clear deferred attributes. */ + sh_deferred_function_attributes = NULL_TREE; + sh_deferred_function_attributes_tail = &sh_deferred_function_attributes; + + return; +} + +/* Supported attributes: + + interrupt_handler -- specifies this function is an interrupt handler. + + trapa_handler - like above, but don't save all registers. + + sp_switch -- specifies an alternate stack for an interrupt handler + to run on. + + trap_exit -- use a trapa to exit an interrupt function instead of + an rte instruction. + + nosave_low_regs - don't save r0..r7 in an interrupt handler. + This is useful on the SH3 and upwards, + which has a separate set of low regs for User and Supervisor modes. + This should only be used for the lowest level of interrupts. Higher levels + of interrupts must save the registers in case they themselves are + interrupted. + + renesas -- use Renesas calling/layout conventions (functions and + structures). + + resbank -- In case of an ISR, use a register bank to save registers + R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets. +*/ + +/* Handle a 'resbank' attribute. */ +static tree +sh_handle_resbank_handler_attribute (tree * node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool * no_add_attrs) +{ + if (!TARGET_SH2A) + { + warning (OPT_Wattributes, "%qE attribute is supported only for SH2A", + name); + *no_add_attrs = true; + } + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Handle an "interrupt_handler" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +sh_handle_interrupt_handler_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + else if (TARGET_SHCOMPACT) + { + error ("attribute interrupt_handler is not compatible with -m5-compact"); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Handle an 'function_vector' attribute; arguments as in + struct attribute_spec.handler. */ +static tree +sh2a_handle_function_vector_handler_attribute (tree * node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool * no_add_attrs) +{ + if (!TARGET_SH2A) + { + warning (OPT_Wattributes, "%qE attribute only applies to SH2A", + name); + *no_add_attrs = true; + } + else if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) + { + /* The argument must be a constant integer. */ + warning (OPT_Wattributes, + "%qE attribute argument not an integer constant", + name); + *no_add_attrs = true; + } + else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255) + { + /* The argument value must be between 0 to 255. */ + warning (OPT_Wattributes, + "%qE attribute argument should be between 0 to 255", + name); + *no_add_attrs = true; + } + return NULL_TREE; +} + +/* Returns 1 if current function has been assigned the attribute + 'function_vector'. */ +int +sh2a_is_function_vector_call (rtx x) +{ + if (GET_CODE (x) == SYMBOL_REF + && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION)) + { + tree tr = SYMBOL_REF_DECL (x); + + if (sh2a_function_vector_p (tr)) + return 1; + } + + return 0; +} + +/* Returns the function vector number, if the the attribute + 'function_vector' is assigned, otherwise returns zero. */ +int +sh2a_get_function_vector_number (rtx x) +{ + int num; + tree list, t; + + if ((GET_CODE (x) == SYMBOL_REF) + && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION)) + { + t = SYMBOL_REF_DECL (x); + + if (TREE_CODE (t) != FUNCTION_DECL) + return 0; + + list = SH_ATTRIBUTES (t); + while (list) + { + if (is_attribute_p ("function_vector", TREE_PURPOSE (list))) + { + num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list))); + return num; + } + + list = TREE_CHAIN (list); + } + + return 0; + } + else + return 0; +} + +/* Handle an "sp_switch" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +sh_handle_sp_switch_attribute (tree *node, tree name, tree args, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST) + { + /* The argument must be a constant string. */ + warning (OPT_Wattributes, "%qE attribute argument not a string constant", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Handle an "trap_exit" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +sh_handle_trap_exit_attribute (tree *node, tree name, tree args, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + /* The argument specifies a trap number to be used in a trapa instruction + at function exit (instead of an rte instruction). */ + else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) + { + /* The argument must be a constant integer. */ + warning (OPT_Wattributes, "%qE attribute argument not an " + "integer constant", name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +static tree +sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED, + tree name ATTRIBUTE_UNUSED, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs ATTRIBUTE_UNUSED) +{ + return NULL_TREE; +} + +/* True if __attribute__((renesas)) or -mrenesas. */ +int +sh_attr_renesas_p (const_tree td) +{ + if (TARGET_HITACHI) + return 1; + if (td == 0) + return 0; + if (DECL_P (td)) + td = TREE_TYPE (td); + if (td == error_mark_node) + return 0; + return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) + != NULL_TREE); +} + +/* True if __attribute__((renesas)) or -mrenesas, for the current + function. */ +int +sh_cfun_attr_renesas_p (void) +{ + return sh_attr_renesas_p (current_function_decl); +} + +int +sh_cfun_interrupt_handler_p (void) +{ + return (lookup_attribute ("interrupt_handler", + DECL_ATTRIBUTES (current_function_decl)) + != NULL_TREE); +} + +/* Returns 1 if FUNC has been assigned the attribute + "function_vector". */ +int +sh2a_function_vector_p (tree func) +{ + tree list; + if (TREE_CODE (func) != FUNCTION_DECL) + return 0; + + list = SH_ATTRIBUTES (func); + while (list) + { + if (is_attribute_p ("function_vector", TREE_PURPOSE (list))) + return 1; + + list = TREE_CHAIN (list); + } + return 0; +} + +/* Returns TRUE if given tree has the "resbank" attribute. */ + +int +sh_cfun_resbank_handler_p (void) +{ + return ((lookup_attribute ("resbank", + DECL_ATTRIBUTES (current_function_decl)) + != NULL_TREE) + && (lookup_attribute ("interrupt_handler", + DECL_ATTRIBUTES (current_function_decl)) + != NULL_TREE) && TARGET_SH2A); +} + +/* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */ + +static const char * +sh_check_pch_target_flags (int old_flags) +{ + if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3 + | MASK_SH_E | MASK_HARD_SH4 + | MASK_FPU_SINGLE | MASK_SH4)) + return _("created and used with different architectures / ABIs"); + if ((old_flags ^ target_flags) & MASK_HITACHI) + return _("created and used with different ABIs"); + if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN) + return _("created and used with different endianness"); + return NULL; +} + +/* Predicates used by the templates. */ + +/* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx. + Used only in general_movsrc_operand. */ + +int +system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + switch (REGNO (op)) + { + case PR_REG: + case MACL_REG: + case MACH_REG: + return 1; + } + return 0; +} + +/* Nonzero if OP is a floating point value with value 0.0. */ + +int +fp_zero_operand (rtx op) +{ + REAL_VALUE_TYPE r; + + if (GET_MODE (op) != SFmode) + return 0; + + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r); +} + +/* Nonzero if OP is a floating point value with value 1.0. */ + +int +fp_one_operand (rtx op) +{ + REAL_VALUE_TYPE r; + + if (GET_MODE (op) != SFmode) + return 0; + + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + return REAL_VALUES_EQUAL (r, dconst1); +} + +/* In general mode switching is used. If we are + compiling without -mfmovd, movsf_ie isn't taken into account for + mode switching. We could check in machine_dependent_reorg for + cases where we know we are in single precision mode, but there is + interface to find that out during reload, so we must avoid + choosing an fldi alternative during reload and thus failing to + allocate a scratch register for the constant loading. */ +int +fldi_ok (void) +{ + return 1; +} + +int +tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + enum rtx_code code = GET_CODE (op); + return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE); +} + +/* Return the TLS type for TLS symbols, 0 for otherwise. */ +enum tls_model +tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + if (GET_CODE (op) != SYMBOL_REF) + return TLS_MODEL_NONE; + return SYMBOL_REF_TLS_MODEL (op); +} + +/* Return the destination address of a branch. */ + +static int +branch_dest (rtx branch) +{ + rtx dest = SET_SRC (PATTERN (branch)); + int dest_uid; + + if (GET_CODE (dest) == IF_THEN_ELSE) + dest = XEXP (dest, 1); + dest = XEXP (dest, 0); + dest_uid = INSN_UID (dest); + return INSN_ADDRESSES (dest_uid); +} + +/* Return nonzero if REG is not used after INSN. + We assume REG is a reload reg, and therefore does + not live past labels. It may live past calls or jumps though. */ +int +reg_unused_after (rtx reg, rtx insn) +{ + enum rtx_code code; + rtx set; + + /* If the reg is set by this instruction, then it is safe for our + case. Disregard the case where this is a store to memory, since + we are checking a register used in the store address. */ + set = single_set (insn); + if (set && !MEM_P (SET_DEST (set)) + && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return 1; + + while ((insn = NEXT_INSN (insn))) + { + rtx set; + if (!INSN_P (insn)) + continue; + + code = GET_CODE (insn); + +#if 0 + /* If this is a label that existed before reload, then the register + if dead here. However, if this is a label added by reorg, then + the register may still be live here. We can't tell the difference, + so we just ignore labels completely. */ + if (code == CODE_LABEL) + return 1; + /* else */ +#endif + + if (code == JUMP_INSN) + return 0; + + /* If this is a sequence, we must handle them all at once. + We could have for instance a call that sets the target register, + and an insn in a delay slot that uses the register. In this case, + we must return 0. */ + else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) + { + int i; + int retval = 0; + + for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) + { + rtx this_insn = XVECEXP (PATTERN (insn), 0, i); + rtx set = single_set (this_insn); + + if (CALL_P (this_insn)) + code = CALL_INSN; + else if (JUMP_P (this_insn)) + { + if (INSN_ANNULLED_BRANCH_P (this_insn)) + return 0; + code = JUMP_INSN; + } + + if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + { + if (!MEM_P (SET_DEST (set))) + retval = 1; + else + return 0; + } + if (set == 0 + && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) + return 0; + } + if (retval == 1) + return 1; + else if (code == JUMP_INSN) + return 0; + } + + set = single_set (insn); + if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return !MEM_P (SET_DEST (set)); + if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) + return 0; + + if (code == CALL_INSN && call_really_used_regs[REGNO (reg)]) + return 1; + } + return 1; +} + +#include "ggc.h" + +static GTY(()) rtx fpscr_rtx; +rtx +get_fpscr_rtx (void) +{ + if (! fpscr_rtx) + { + fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG); + REG_USERVAR_P (fpscr_rtx) = 1; + mark_user_reg (fpscr_rtx); + } + if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG) + mark_user_reg (fpscr_rtx); + return fpscr_rtx; +} + +static GTY(()) tree fpscr_values; + +static void +emit_fpu_switch (rtx scratch, int index) +{ + rtx dst, src; + + if (fpscr_values == NULL) + { + tree t; + + t = build_index_type (integer_one_node); + t = build_array_type (integer_type_node, t); + t = build_decl (BUILTINS_LOCATION, + VAR_DECL, get_identifier ("__fpscr_values"), t); + DECL_ARTIFICIAL (t) = 1; + DECL_IGNORED_P (t) = 1; + DECL_EXTERNAL (t) = 1; + TREE_STATIC (t) = 1; + TREE_PUBLIC (t) = 1; + TREE_USED (t) = 1; + + fpscr_values = t; + } + + src = DECL_RTL (fpscr_values); + if (!can_create_pseudo_p ()) + { + emit_move_insn (scratch, XEXP (src, 0)); + if (index != 0) + emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4))); + src = adjust_automodify_address (src, PSImode, scratch, index * 4); + } + else + src = adjust_address (src, PSImode, index * 4); + + dst = get_fpscr_rtx (); + emit_move_insn (dst, src); +} + +void +emit_sf_insn (rtx pat) +{ + emit_insn (pat); +} + +void +emit_df_insn (rtx pat) +{ + emit_insn (pat); +} + +void +expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands) +{ + emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); +} + +void +expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands) +{ + emit_sf_insn ((*fun) (operands[0], operands[1], operands[2], + get_fpscr_rtx ())); +} + +void +expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands) +{ + emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); +} + +void +expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands) +{ + emit_df_insn ((*fun) (operands[0], operands[1], operands[2], + get_fpscr_rtx ())); +} + +static rtx get_free_reg (HARD_REG_SET); + +/* This function returns a register to use to load the address to load + the fpscr from. Currently it always returns r1 or r7, but when we are + able to use pseudo registers after combine, or have a better mechanism + for choosing a register, it should be done here. */ +/* REGS_LIVE is the liveness information for the point for which we + need this allocation. In some bare-bones exit blocks, r1 is live at the + start. We can even have all of r0..r3 being live: +__complex__ long long f (double d) { if (d == 0) return 2; else return 3; } + INSN before which new insns are placed with will clobber the register + we return. If a basic block consists only of setting the return value + register to a pseudo and using that register, the return value is not + live before or after this block, yet we we'll insert our insns right in + the middle. */ + +static rtx +get_free_reg (HARD_REG_SET regs_live) +{ + if (! TEST_HARD_REG_BIT (regs_live, 1)) + return gen_rtx_REG (Pmode, 1); + + /* Hard reg 1 is live; since this is a small register classes target, + there shouldn't be anything but a jump before the function end. */ + gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7)); + return gen_rtx_REG (Pmode, 7); +} + +/* This function will set the fpscr from memory. + MODE is the mode we are setting it to. */ +void +fpscr_set_from_mem (int mode, HARD_REG_SET regs_live) +{ + enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode; + enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE); + rtx addr_reg; + + addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX; + emit_fpu_switch (addr_reg, fp_mode == norm_mode); +} + +/* Is the given character a logical line separator for the assembler? */ +#ifndef IS_ASM_LOGICAL_LINE_SEPARATOR +#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';') +#endif + +int +sh_insn_length_adjustment (rtx insn) +{ + /* Instructions with unfilled delay slots take up an extra two bytes for + the nop in the delay slot. */ + if (((NONJUMP_INSN_P (insn) + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + || CALL_P (insn) + || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn))) + && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE + && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES) + return 2; + + /* SH2e has a bug that prevents the use of annulled branches, so if + the delay slot is not filled, we'll have to put a NOP in it. */ + if (sh_cpu_attr == CPU_SH2E + && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn) + && get_attr_type (insn) == TYPE_CBRANCH + && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE) + return 2; + + /* sh-dsp parallel processing insn take four bytes instead of two. */ + + if (NONJUMP_INSN_P (insn)) + { + int sum = 0; + rtx body = PATTERN (insn); + const char *templ; + char c; + int maybe_label = 1; + + if (GET_CODE (body) == ASM_INPUT) + templ = XSTR (body, 0); + else if (asm_noperands (body) >= 0) + templ + = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL); + else + return 0; + do + { + int ppi_adjust = 0; + + do + c = *templ++; + while (c == ' ' || c == '\t'); + /* all sh-dsp parallel-processing insns start with p. + The only non-ppi sh insn starting with p is pref. + The only ppi starting with pr is prnd. */ + if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2)) + ppi_adjust = 2; + /* The repeat pseudo-insn expands two three insns, a total of + six bytes in size. */ + else if ((c == 'r' || c == 'R') + && ! strncasecmp ("epeat", templ, 5)) + ppi_adjust = 4; + while (c && c != '\n' + && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ)) + { + /* If this is a label, it is obviously not a ppi insn. */ + if (c == ':' && maybe_label) + { + ppi_adjust = 0; + break; + } + else if (c == '\'' || c == '"') + maybe_label = 0; + c = *templ++; + } + sum += ppi_adjust; + maybe_label = c != ':'; + } + while (c); + return sum; + } + return 0; +} + +/* Return TRUE for a valid displacement for the REG+disp addressing + with MODE. */ + +/* ??? The SH2e does not have the REG+disp addressing mode when loading values + into the FRx registers. We implement this by setting the maximum offset + to zero when the value is SFmode. This also restricts loading of SFmode + values into the integer registers, but that can't be helped. */ + +/* The SH allows a displacement in a QI or HI amode, but only when the + other operand is R0. GCC doesn't handle this very well, so we forgot + all of that. + + A legitimate index for a QI or HI is 0, SI can be any number 0..63, + DI can be any number 0..60. */ + +bool +sh_legitimate_index_p (enum machine_mode mode, rtx op) +{ + if (CONST_INT_P (op)) + { + if (TARGET_SHMEDIA) + { + int size; + + /* Check if this the address of an unaligned load / store. */ + if (mode == VOIDmode) + return CONST_OK_FOR_I06 (INTVAL (op)); + + size = GET_MODE_SIZE (mode); + return (!(INTVAL (op) & (size - 1)) + && INTVAL (op) >= -512 * size + && INTVAL (op) < 512 * size); + } + + if (TARGET_SH2A) + { + if (GET_MODE_SIZE (mode) == 1 + && (unsigned) INTVAL (op) < 4096) + return true; + } + + if ((GET_MODE_SIZE (mode) == 4 + && (unsigned) INTVAL (op) < 64 + && !(INTVAL (op) & 3) + && !(TARGET_SH2E && mode == SFmode)) + || (GET_MODE_SIZE (mode) == 4 + && (unsigned) INTVAL (op) < 16383 + && !(INTVAL (op) & 3) && TARGET_SH2A)) + return true; + + if ((GET_MODE_SIZE (mode) == 8 + && (unsigned) INTVAL (op) < 60 + && !(INTVAL (op) & 3) + && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode)) + || ((GET_MODE_SIZE (mode)==8) + && (unsigned) INTVAL (op) < 8192 + && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3)) + && (TARGET_SH2A && mode == DFmode))) + return true; + } + + return false; +} + +/* Recognize an RTL expression that is a valid memory address for + an instruction. + The MODE argument is the machine mode for the MEM expression + that wants to use this address. + Allow REG + REG+disp + REG+r0 + REG++ + --REG */ + +static bool +sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + if (MAYBE_BASE_REGISTER_RTX_P (x, strict)) + return true; + else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC) + && ! TARGET_SHMEDIA + && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict)) + return true; + else if (GET_CODE (x) == PLUS + && (mode != PSImode || reload_completed)) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + if (GET_MODE_SIZE (mode) <= 8 + && MAYBE_BASE_REGISTER_RTX_P (xop0, strict) + && sh_legitimate_index_p (mode, xop1)) + return true; + + if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode + || ((xop0 == stack_pointer_rtx + || xop0 == hard_frame_pointer_rtx) + && REG_P (xop1) && REGNO (xop1) == R0_REG) + || ((xop1 == stack_pointer_rtx + || xop1 == hard_frame_pointer_rtx) + && REG_P (xop0) && REGNO (xop0) == R0_REG)) + && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4) + || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8) + || ((TARGET_SH4 || TARGET_SH2A_DOUBLE) + && TARGET_FMOVD && mode == DFmode))) + { + if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict) + && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict)) + return true; + if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict) + && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)) + return true; + } + } + + return false; +} + +/* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol + isn't protected by a PIC unspec. */ +int +nonpic_symbol_mentioned_p (rtx x) +{ + register const char *fmt; + register int i; + + if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF + || GET_CODE (x) == PC) + return 1; + + /* We don't want to look into the possible MEM location of a + CONST_DOUBLE, since we're not going to use it, in general. */ + if (GET_CODE (x) == CONST_DOUBLE) + return 0; + + if (GET_CODE (x) == UNSPEC + && (XINT (x, 1) == UNSPEC_PIC + || XINT (x, 1) == UNSPEC_GOT + || XINT (x, 1) == UNSPEC_GOTOFF + || XINT (x, 1) == UNSPEC_GOTPLT + || XINT (x, 1) == UNSPEC_GOTTPOFF + || XINT (x, 1) == UNSPEC_DTPOFF + || XINT (x, 1) == UNSPEC_TPOFF + || XINT (x, 1) == UNSPEC_PLT + || XINT (x, 1) == UNSPEC_SYMOFF + || XINT (x, 1) == UNSPEC_PCREL_SYMOFF)) + return 0; + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j))) + return 1; + } + else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i))) + return 1; + } + + return 0; +} + +/* Convert a non-PIC address in `orig' to a PIC address using @GOT or + @GOTOFF in `reg'. */ +rtx +legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED, + rtx reg) +{ + if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE) + return orig; + + if (GET_CODE (orig) == LABEL_REF + || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig))) + { + if (reg == 0) + reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTOFF2reg (reg, orig)); + return reg; + } + else if (GET_CODE (orig) == SYMBOL_REF) + { + if (reg == 0) + reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOT2reg (reg, orig)); + return reg; + } + return orig; +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. + Otherwise, return X. + + For the SH, if X is almost suitable for indexing, but the offset is + out of range, convert it into a normal form so that CSE has a chance + of reducing the number of address registers used. */ + +static rtx +sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode) +{ + if (flag_pic) + x = legitimize_pic_address (oldx, mode, NULL_RTX); + + if (GET_CODE (x) == PLUS + && (GET_MODE_SIZE (mode) == 4 + || GET_MODE_SIZE (mode) == 8) + && CONST_INT_P (XEXP (x, 1)) + && BASE_REGISTER_RTX_P (XEXP (x, 0)) + && ! TARGET_SHMEDIA + && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode) + && ! (TARGET_SH2E && mode == SFmode)) + { + rtx index_rtx = XEXP (x, 1); + HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; + rtx sum; + + /* On rare occasions, we might get an unaligned pointer + that is indexed in a way to give an aligned address. + Therefore, keep the lower two bits in offset_base. */ + /* Instead of offset_base 128..131 use 124..127, so that + simple add suffices. */ + if (offset > 127) + offset_base = ((offset + 4) & ~60) - 4; + else + offset_base = offset & ~60; + + /* Sometimes the normal form does not suit DImode. We + could avoid that by using smaller ranges, but that + would give less optimized code when SImode is + prevalent. */ + if (GET_MODE_SIZE (mode) + offset - offset_base <= 64) + { + sum = expand_binop (Pmode, add_optab, XEXP (x, 0), + GEN_INT (offset_base), NULL_RTX, 0, + OPTAB_LIB_WIDEN); + + return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base)); + } + } + + return x; +} + +/* Attempt to replace *P, which is an address that needs reloading, with + a valid memory address for an operand of mode MODE. + Like for sh_legitimize_address, for the SH we try to get a normal form + of the address. That will allow inheritance of the address reloads. */ + +bool +sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum, + int itype) +{ + enum reload_type type = (enum reload_type) itype; + + if (GET_CODE (*p) == PLUS + && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) + && CONST_INT_P (XEXP (*p, 1)) + && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true) + && ! TARGET_SHMEDIA + && ! (TARGET_SH4 && mode == DFmode) + && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS) + && (ALLOW_INDEXED_ADDRESS + || XEXP (*p, 0) == stack_pointer_rtx + || XEXP (*p, 0) == hard_frame_pointer_rtx)) + { + rtx index_rtx = XEXP (*p, 1); + HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; + rtx sum; + + if (TARGET_SH2A && mode == DFmode && (offset & 0x7)) + { + push_reload (*p, NULL_RTX, p, NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); + goto win; + } + if (TARGET_SH2E && mode == SFmode) + { + *p = copy_rtx (*p); + push_reload (*p, NULL_RTX, p, NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); + goto win; + } + /* Instead of offset_base 128..131 use 124..127, so that + simple add suffices. */ + if (offset > 127) + offset_base = ((offset + 4) & ~60) - 4; + else + offset_base = offset & ~60; + /* Sometimes the normal form does not suit DImode. We could avoid + that by using smaller ranges, but that would give less optimized + code when SImode is prevalent. */ + if (GET_MODE_SIZE (mode) + offset - offset_base <= 64) + { + sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base)); + *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base)); + push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); + goto win; + } + } + /* We must re-recognize what we created before. */ + else if (GET_CODE (*p) == PLUS + && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) + && GET_CODE (XEXP (*p, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (*p, 0), 1)) + && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true) + && CONST_INT_P (XEXP (*p, 1)) + && ! TARGET_SHMEDIA + && ! (TARGET_SH2E && mode == SFmode)) + { + /* Because this address is so complex, we know it must have + been created by LEGITIMIZE_RELOAD_ADDRESS before; thus, + it is already unshared, and needs no further unsharing. */ + push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); + goto win; + } + + return false; + + win: + return true; +} + +/* In the name of slightly smaller debug output, and to cater to + general assembler lossage, recognize various UNSPEC sequences + and turn them back into a direct symbol reference. */ + +static rtx +sh_delegitimize_address (rtx orig_x) +{ + rtx x, y; + + orig_x = delegitimize_mem_from_attrs (orig_x); + + x = orig_x; + if (MEM_P (x)) + x = XEXP (x, 0); + if (GET_CODE (x) == CONST) + { + y = XEXP (x, 0); + if (GET_CODE (y) == UNSPEC) + { + if (XINT (y, 1) == UNSPEC_GOT + || XINT (y, 1) == UNSPEC_GOTOFF) + return XVECEXP (y, 0, 0); + else if (TARGET_SHMEDIA + && (XINT (y, 1) == UNSPEC_EXTRACT_S16 + || XINT (y, 1) == UNSPEC_EXTRACT_U16)) + { + rtx offset = XVECEXP (y, 0, 1); + + x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset); + if (MEM_P (orig_x)) + x = replace_equiv_address_nv (orig_x, x); + return x; + } + } + } + + return orig_x; +} + +/* Mark the use of a constant in the literal table. If the constant + has multiple labels, make it unique. */ +static rtx +mark_constant_pool_use (rtx x) +{ + rtx insn, lab, pattern; + + if (x == NULL) + return x; + + switch (GET_CODE (x)) + { + case LABEL_REF: + x = XEXP (x, 0); + case CODE_LABEL: + break; + default: + return x; + } + + /* Get the first label in the list of labels for the same constant + and delete another labels in the list. */ + lab = x; + for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn)) + { + if (!LABEL_P (insn) + || LABEL_REFS (insn) != NEXT_INSN (insn)) + break; + lab = insn; + } + + for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn)) + INSN_DELETED_P (insn) = 1; + + /* Mark constants in a window. */ + for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn)) + { + if (!NONJUMP_INSN_P (insn)) + continue; + + pattern = PATTERN (insn); + if (GET_CODE (pattern) != UNSPEC_VOLATILE) + continue; + + switch (XINT (pattern, 1)) + { + case UNSPECV_CONST2: + case UNSPECV_CONST4: + case UNSPECV_CONST8: + XVECEXP (pattern, 0, 1) = const1_rtx; + break; + case UNSPECV_WINDOW_END: + if (XVECEXP (pattern, 0, 0) == x) + return lab; + break; + case UNSPECV_CONST_END: + return lab; + default: + break; + } + } + + return lab; +} + +/* Return true if it's possible to redirect BRANCH1 to the destination + of an unconditional jump BRANCH2. We only want to do this if the + resulting branch will have a short displacement. */ +int +sh_can_redirect_branch (rtx branch1, rtx branch2) +{ + if (flag_expensive_optimizations && simplejump_p (branch2)) + { + rtx dest = XEXP (SET_SRC (single_set (branch2)), 0); + rtx insn; + int distance; + + for (distance = 0, insn = NEXT_INSN (branch1); + insn && distance < 256; + insn = PREV_INSN (insn)) + { + if (insn == dest) + return 1; + else + distance += get_attr_length (insn); + } + for (distance = 0, insn = NEXT_INSN (branch1); + insn && distance < 256; + insn = NEXT_INSN (insn)) + { + if (insn == dest) + return 1; + else + distance += get_attr_length (insn); + } + } + return 0; +} + +/* Return nonzero if register old_reg can be renamed to register new_reg. */ +int +sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, + unsigned int new_reg) +{ + /* Interrupt functions can only use registers that have already been + saved by the prologue, even if they would normally be + call-clobbered. */ + + if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg)) + return 0; + + return 1; +} + +/* Function to update the integer COST + based on the relationship between INSN that is dependent on + DEP_INSN through the dependence LINK. The default is to make no + adjustment to COST. This can be used for example to specify to + the scheduler that an output- or anti-dependence does not incur + the same cost as a data-dependence. The return value should be + the new value for COST. */ +static int +sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost) +{ + rtx reg, use_pat; + + if (TARGET_SHMEDIA) + { + /* On SHmedia, if the dependence is an anti-dependence or + output-dependence, there is no cost. */ + if (REG_NOTE_KIND (link) != 0) + { + /* However, dependencies between target register loads and + uses of the register in a subsequent block that are separated + by a conditional branch are not modelled - we have to do with + the anti-dependency between the target register load and the + conditional branch that ends the current block. */ + if (REG_NOTE_KIND (link) == REG_DEP_ANTI + && GET_CODE (PATTERN (dep_insn)) == SET + && (get_attr_type (dep_insn) == TYPE_PT_MEDIA + || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA) + && get_attr_type (insn) == TYPE_CBRANCH_MEDIA) + { + int orig_cost = cost; + rtx note = find_reg_note (insn, REG_BR_PROB, 0); + rtx target = ((! note + || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE) + ? insn : JUMP_LABEL (insn)); + /* On the likely path, the branch costs 1, on the unlikely path, + it costs 3. */ + cost--; + do + target = next_active_insn (target); + while (target && ! flow_dependent_p (target, dep_insn) + && --cost > 0); + /* If two branches are executed in immediate succession, with the + first branch properly predicted, this causes a stall at the + second branch, hence we won't need the target for the + second branch for two cycles after the launch of the first + branch. */ + if (cost > orig_cost - 2) + cost = orig_cost - 2; + } + else + cost = 0; + } + + else if (get_attr_is_mac_media (insn) + && get_attr_is_mac_media (dep_insn)) + cost = 1; + + else if (! reload_completed + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT + && GET_CODE (PATTERN (dep_insn)) == SET + && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode) + && cost < 4) + cost = 4; + /* Schedule the ptabs for a casesi_jump_media in preference to stuff + that is needed at the target. */ + else if (get_attr_type (insn) == TYPE_JUMP_MEDIA + && ! flow_dependent_p (insn, dep_insn)) + cost--; + } + else if (REG_NOTE_KIND (link) == 0) + { + enum attr_type type; + rtx dep_set; + + if (recog_memoized (insn) < 0 + || recog_memoized (dep_insn) < 0) + return cost; + + dep_set = single_set (dep_insn); + + /* The latency that we specify in the scheduling description refers + to the actual output, not to an auto-increment register; for that, + the latency is one. */ + if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1) + { + rtx set = single_set (insn); + + if (set + && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set)) + && (!MEM_P (SET_DEST (set)) + || !reg_mentioned_p (SET_DEST (dep_set), + XEXP (SET_DEST (set), 0)))) + cost = 1; + } + /* The only input for a call that is timing-critical is the + function's address. */ + if (CALL_P (insn)) + { + rtx call = PATTERN (insn); + + if (GET_CODE (call) == PARALLEL) + call = XVECEXP (call, 0 ,0); + if (GET_CODE (call) == SET) + call = SET_SRC (call); + if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0)) + /* sibcalli_thunk uses a symbol_ref in an unspec. */ + && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC + || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))) + cost -= TARGET_SH4_300 ? 3 : 6; + } + /* Likewise, the most timing critical input for an sfuncs call + is the function address. However, sfuncs typically start + using their arguments pretty quickly. + Assume a four cycle delay for SH4 before they are needed. + Cached ST40-300 calls are quicker, so assume only a one + cycle delay there. + ??? Maybe we should encode the delays till input registers + are needed by sfuncs into the sfunc call insn. */ + /* All sfunc calls are parallels with at least four components. + Exploit this to avoid unnecessary calls to sfunc_uses_reg. */ + else if (GET_CODE (PATTERN (insn)) == PARALLEL + && XVECLEN (PATTERN (insn), 0) >= 4 + && (reg = sfunc_uses_reg (insn))) + { + if (! reg_set_p (reg, dep_insn)) + cost -= TARGET_SH4_300 ? 1 : 4; + } + if (TARGET_HARD_SH4 && !TARGET_SH4_300) + { + enum attr_type dep_type = get_attr_type (dep_insn); + + if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD) + cost--; + else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI) + && (type = get_attr_type (insn)) != TYPE_CALL + && type != TYPE_SFUNC) + cost--; + /* When the preceding instruction loads the shift amount of + the following SHAD/SHLD, the latency of the load is increased + by 1 cycle. */ + if (get_attr_type (insn) == TYPE_DYN_SHIFT + && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES + && reg_overlap_mentioned_p (SET_DEST (dep_set), + XEXP (SET_SRC (single_set (insn)), + 1))) + cost++; + /* When an LS group instruction with a latency of less than + 3 cycles is followed by a double-precision floating-point + instruction, FIPR, or FTRV, the latency of the first + instruction is increased to 3 cycles. */ + else if (cost < 3 + && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP + && get_attr_dfp_comp (insn) == DFP_COMP_YES) + cost = 3; + /* The lsw register of a double-precision computation is ready one + cycle earlier. */ + else if (reload_completed + && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES + && (use_pat = single_set (insn)) + && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))), + SET_SRC (use_pat))) + cost -= 1; + + if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES + && get_attr_late_fp_use (insn) == LATE_FP_USE_YES) + cost -= 1; + } + else if (TARGET_SH4_300) + { + /* Stores need their input register two cycles later. */ + if (dep_set && cost >= 1 + && ((type = get_attr_type (insn)) == TYPE_STORE + || type == TYPE_PSTORE + || type == TYPE_FSTORE || type == TYPE_MAC_MEM)) + { + rtx set = single_set (insn); + + if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0)) + && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set))) + { + cost -= 2; + /* But don't reduce the cost below 1 if the address depends + on a side effect of dep_insn. */ + if (cost < 1 + && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn)) + cost = 1; + } + } + } + } + /* An anti-dependence penalty of two applies if the first insn is a double + precision fadd / fsub / fmul. */ + else if (!TARGET_SH4_300 + && REG_NOTE_KIND (link) == REG_DEP_ANTI + && recog_memoized (dep_insn) >= 0 + && (get_attr_type (dep_insn) == TYPE_DFP_ARITH + || get_attr_type (dep_insn) == TYPE_DFP_MUL) + /* A lot of alleged anti-flow dependences are fake, + so check this one is real. */ + && flow_dependent_p (dep_insn, insn)) + cost = 2; + + return cost; +} + +/* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check + if DEP_INSN is anti-flow dependent on INSN. */ +static int +flow_dependent_p (rtx insn, rtx dep_insn) +{ + rtx tmp = PATTERN (insn); + + note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp); + return tmp == NULL_RTX; +} + +/* A helper function for flow_dependent_p called through note_stores. */ +static void +flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) +{ + rtx * pinsn = (rtx *) data; + + if (*pinsn && reg_referenced_p (x, *pinsn)) + *pinsn = NULL_RTX; +} + +/* For use by sh_allocate_initial_value. Note that sh.md contains some + 'special function' patterns (type sfunc) that clobber pr, but that + do not look like function calls to leaf_function_p. Hence we must + do this extra check. */ +static int +sh_pr_n_sets (void) +{ + return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG); +} + +/* Return where to allocate pseudo for a given hard register initial + value. */ +static rtx +sh_allocate_initial_value (rtx hard_reg) +{ + rtx x; + + if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)) + { + if (current_function_is_leaf + && ! sh_pr_n_sets () + && ! (TARGET_SHCOMPACT + && ((crtl->args.info.call_cookie + & ~ CALL_COOKIE_RET_TRAMP (1)) + || crtl->saves_all_registers))) + x = hard_reg; + else + x = gen_frame_mem (Pmode, return_address_pointer_rtx); + } + else + x = NULL_RTX; + + return x; +} + +/* This function returns "2" to indicate dual issue for the SH4 + processor. To be used by the DFA pipeline description. */ +static int +sh_issue_rate (void) +{ + if (TARGET_SUPERSCALAR) + return 2; + else + return 1; +} + +/* Functions for ready queue reordering for sched1. */ + +/* Get weight for mode for a set x. */ +static short +find_set_regmode_weight (rtx x, enum machine_mode mode) +{ + if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode)) + return 1; + if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode)) + { + if (REG_P (SET_DEST (x))) + { + if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x))) + return 1; + else + return 0; + } + return 1; + } + return 0; +} + +/* Get regmode weight for insn. */ +static short +find_insn_regmode_weight (rtx insn, enum machine_mode mode) +{ + short reg_weight = 0; + rtx x; + + /* Increment weight for each register born here. */ + x = PATTERN (insn); + reg_weight += find_set_regmode_weight (x, mode); + if (GET_CODE (x) == PARALLEL) + { + int j; + for (j = XVECLEN (x, 0) - 1; j >= 0; j--) + { + x = XVECEXP (PATTERN (insn), 0, j); + reg_weight += find_set_regmode_weight (x, mode); + } + } + /* Decrement weight for each register that dies here. */ + for (x = REG_NOTES (insn); x; x = XEXP (x, 1)) + { + if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED) + { + rtx note = XEXP (x, 0); + if (REG_P (note) && GET_MODE (note) == mode) + reg_weight--; + } + } + return reg_weight; +} + +/* Calculate regmode weights for all insns of a basic block. */ +static void +find_regmode_weight (basic_block b, enum machine_mode mode) +{ + rtx insn, next_tail, head, tail; + + get_ebb_head_tail (b, b, &head, &tail); + next_tail = NEXT_INSN (tail); + + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + { + /* Handle register life information. */ + if (!INSN_P (insn)) + continue; + + if (mode == SFmode) + INSN_REGMODE_WEIGHT (insn, mode) = + find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode); + else if (mode == SImode) + INSN_REGMODE_WEIGHT (insn, mode) = + find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode); + } +} + +/* Comparison function for ready queue sorting. */ +static int +rank_for_reorder (const void *x, const void *y) +{ + rtx tmp = *(const rtx *) y; + rtx tmp2 = *(const rtx *) x; + + /* The insn in a schedule group should be issued the first. */ + if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2)) + return SCHED_GROUP_P (tmp2) ? 1 : -1; + + /* If insns are equally good, sort by INSN_LUID (original insn order), This + minimizes instruction movement, thus minimizing sched's effect on + register pressure. */ + return INSN_LUID (tmp) - INSN_LUID (tmp2); +} + +/* Resort the array A in which only element at index N may be out of order. */ +static void +swap_reorder (rtx *a, int n) +{ + rtx insn = a[n - 1]; + int i = n - 2; + + while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0) + { + a[i + 1] = a[i]; + i -= 1; + } + a[i + 1] = insn; +} + +#define SCHED_REORDER(READY, N_READY) \ + do \ + { \ + if ((N_READY) == 2) \ + swap_reorder (READY, N_READY); \ + else if ((N_READY) > 2) \ + qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \ + } \ + while (0) + +/* Sort the ready list READY by ascending priority, using the SCHED_REORDER + macro. */ +static void +ready_reorder (rtx *ready, int nready) +{ + SCHED_REORDER (ready, nready); +} + +/* Count life regions of r0 for a block. */ +static int +find_r0_life_regions (basic_block b) +{ + rtx end, insn; + rtx pset; + rtx r0_reg; + int live; + int set; + int death = 0; + + if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG)) + { + set = 1; + live = 1; + } + else + { + set = 0; + live = 0; + } + + insn = BB_HEAD (b); + end = BB_END (b); + r0_reg = gen_rtx_REG (SImode, R0_REG); + while (1) + { + if (INSN_P (insn)) + { + if (find_regno_note (insn, REG_DEAD, R0_REG)) + { + death++; + live = 0; + } + if (!live + && (pset = single_set (insn)) + && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset)) + && !find_regno_note (insn, REG_UNUSED, R0_REG)) + { + set++; + live = 1; + } + } + if (insn == end) + break; + insn = NEXT_INSN (insn); + } + return set - death; +} + +/* Calculate regmode weights for all insns of all basic block. */ +static void +sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED, + int verbose ATTRIBUTE_UNUSED, + int old_max_uid) +{ + basic_block b; + + regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short)); + regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short)); + r0_life_regions = 0; + + FOR_EACH_BB_REVERSE (b) + { + find_regmode_weight (b, SImode); + find_regmode_weight (b, SFmode); + if (!reload_completed) + r0_life_regions += find_r0_life_regions (b); + } + + CURR_REGMODE_PRESSURE (SImode) = 0; + CURR_REGMODE_PRESSURE (SFmode) = 0; + +} + +/* Cleanup. */ +static void +sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED, + int verbose ATTRIBUTE_UNUSED) +{ + if (regmode_weight[0]) + { + free (regmode_weight[0]); + regmode_weight[0] = NULL; + } + if (regmode_weight[1]) + { + free (regmode_weight[1]); + regmode_weight[1] = NULL; + } +} + +/* The scalar modes supported differs from the default version in TImode + for 32-bit SHMEDIA. */ +static bool +sh_scalar_mode_supported_p (enum machine_mode mode) +{ + if (TARGET_SHMEDIA32 && mode == TImode) + return false; + + return default_scalar_mode_supported_p (mode); +} + +/* Cache the can_issue_more so that we can return it from reorder2. Also, + keep count of register pressures on SImode and SFmode. */ +static int +sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + rtx insn, + int can_issue_more) +{ + if (GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + cached_can_issue_more = can_issue_more - 1; + else + cached_can_issue_more = can_issue_more; + + if (reload_completed) + return cached_can_issue_more; + + CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode); + CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode); + + return cached_can_issue_more; +} + +static void +sh_md_init (FILE *dump ATTRIBUTE_UNUSED, + int verbose ATTRIBUTE_UNUSED, + int veclen ATTRIBUTE_UNUSED) +{ + CURR_REGMODE_PRESSURE (SImode) = 0; + CURR_REGMODE_PRESSURE (SFmode) = 0; +} + +/* Some magic numbers. */ +/* Pressure on register r0 can lead to spill failures. so avoid sched1 for + functions that already have high pressure on r0. */ +#define R0_MAX_LIFE_REGIONS 2 +/* Register Pressure thresholds for SImode and SFmode registers. */ +#define SIMODE_MAX_WEIGHT 5 +#define SFMODE_MAX_WEIGHT 10 + +/* Return true if the pressure is high for MODE. */ +static short +high_pressure (enum machine_mode mode) +{ + /* Pressure on register r0 can lead to spill failures. so avoid sched1 for + functions that already have high pressure on r0. */ + if (r0_life_regions >= R0_MAX_LIFE_REGIONS) + return 1; + + if (mode == SFmode) + return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT); + else + return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT); +} + +/* Reorder ready queue if register pressure is high. */ +static int +sh_reorder (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + rtx *ready, + int *n_readyp, + int clock_var ATTRIBUTE_UNUSED) +{ + if (reload_completed) + return sh_issue_rate (); + + if (high_pressure (SFmode) || high_pressure (SImode)) + { + ready_reorder (ready, *n_readyp); + } + + return sh_issue_rate (); +} + +/* Skip cycles if the current register pressure is high. */ +static int +sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + rtx *ready ATTRIBUTE_UNUSED, + int *n_readyp ATTRIBUTE_UNUSED, + int clock_var ATTRIBUTE_UNUSED) +{ + if (reload_completed) + return cached_can_issue_more; + + if (high_pressure(SFmode) || high_pressure (SImode)) + skip_cycles = 1; + + return cached_can_issue_more; +} + +/* Skip cycles without sorting the ready queue. This will move insn from + Q->R. If this is the last cycle we are skipping; allow sorting of ready + queue by sh_reorder. */ + +/* Generally, skipping these many cycles are sufficient for all insns to move + from Q -> R. */ +#define MAX_SKIPS 8 + +static int +sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + rtx insn ATTRIBUTE_UNUSED, + int last_clock_var, + int clock_var, + int *sort_p) +{ + if (reload_completed) + return 0; + + if (skip_cycles) + { + if ((clock_var - last_clock_var) < MAX_SKIPS) + { + *sort_p = 0; + return 1; + } + /* If this is the last cycle we are skipping, allow reordering of R. */ + if ((clock_var - last_clock_var) == MAX_SKIPS) + { + *sort_p = 1; + return 1; + } + } + + skip_cycles = 0; + + return 0; +} + +/* SHmedia requires registers for branches, so we can't generate new + branches past reload. */ +static bool +sh_cannot_modify_jumps_p (void) +{ + return (TARGET_SHMEDIA && (reload_in_progress || reload_completed)); +} + +static reg_class_t +sh_target_reg_class (void) +{ + return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS; +} + +static bool +sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen) +{ + HARD_REG_SET dummy; +#if 0 + rtx insn; +#endif + + if (! shmedia_space_reserved_for_target_registers) + return 0; + if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS) + return 0; + if (calc_live_regs (&dummy) >= 6 * 8) + return 1; + return 0; +} + +static bool +sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED) +{ + return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type)); +} + +/* + On the SH1..SH4, the trampoline looks like + 2 0002 D202 mov.l l2,r2 + 1 0000 D301 mov.l l1,r3 + 3 0004 422B jmp @r2 + 4 0006 0009 nop + 5 0008 00000000 l1: .long area + 6 000c 00000000 l2: .long function + + SH5 (compact) uses r1 instead of r3 for the static chain. */ + + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ + +static void +sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0)); + + if (TARGET_SHMEDIA64) + { + rtx tramp_templ; + int fixed_len; + + rtx movi1 = GEN_INT (0xcc000010); + rtx shori1 = GEN_INT (0xc8000010); + rtx src, dst; + + /* The following trampoline works within a +- 128 KB range for cxt: + ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0; + shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0 + gettr tr1,r1; blink tr0,r63 */ + /* Address rounding makes it hard to compute the exact bounds of the + offset for this trampoline, but we have a rather generous offset + range, so frame_offset should do fine as an upper bound. */ + if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000) + { + /* ??? could optimize this trampoline initialization + by writing DImode words with two insns each. */ + rtx mask = force_reg (DImode, GEN_INT (0x3fffc00)); + rtx insn = gen_rtx_MINUS (DImode, cxt, tramp); + insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2)); + insn = gen_rtx_AND (DImode, insn, mask); + /* Or in ptb/u .,tr1 pattern */ + insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode)); + insn = force_operand (insn, NULL_RTX); + insn = gen_lowpart (SImode, insn); + emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn); + insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38)); + insn = gen_rtx_AND (DImode, insn, mask); + insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX); + insn = gen_lowpart (SImode, insn); + emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn); + insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22)); + insn = gen_rtx_AND (DImode, insn, mask); + insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); + insn = gen_lowpart (SImode, insn); + emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn); + insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6)); + insn = gen_rtx_AND (DImode, insn, mask); + insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); + insn = gen_lowpart (SImode, insn); + emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn); + insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10)); + insn = gen_rtx_AND (DImode, insn, mask); + insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); + insn = gen_lowpart (SImode, insn); + emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn); + emit_move_insn (adjust_address (tramp_mem, SImode, 20), + GEN_INT (0x6bf10600)); + emit_move_insn (adjust_address (tramp_mem, SImode, 24), + GEN_INT (0x4415fc10)); + emit_move_insn (adjust_address (tramp_mem, SImode, 28), + GEN_INT (0x4401fff0)); + emit_insn (gen_ic_invalidate_line (tramp)); + return; + } + tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline"); + fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode); + + tramp_templ = gen_datalabel_ref (tramp_templ); + dst = tramp_mem; + src = gen_const_mem (BLKmode, tramp_templ); + set_mem_align (dst, 256); + set_mem_align (src, 64); + emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL); + + emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr); + emit_move_insn (adjust_address (tramp_mem, Pmode, + fixed_len + GET_MODE_SIZE (Pmode)), + cxt); + emit_insn (gen_ic_invalidate_line (tramp)); + return; + } + else if (TARGET_SHMEDIA) + { + /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0 + movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */ + rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode); + rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode); + /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated, + rotated 10 right, and higher 16 bit of every 32 selected. */ + rtx movishori + = force_reg (V2HImode, (simplify_gen_subreg + (V2HImode, GEN_INT (0x4330432), SImode, 0))); + rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600)); + rtx blink = force_reg (DImode, GEN_INT (0x4401fff0)); + + fnaddr = force_reg (SImode, fnaddr); + cxt = force_reg (SImode, cxt); + emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0), + gen_rtx_SUBREG (V2HImode, fnaddr, 0), + movishori)); + emit_insn (gen_rotrdi3_mextr (quad0, quad0, + GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56))); + emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx)); + emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0); + emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0), + gen_rtx_SUBREG (V2HImode, cxt, 0), + movishori)); + emit_insn (gen_rotrdi3_mextr (cxtload, cxtload, + GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56))); + emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx)); + if (TARGET_LITTLE_ENDIAN) + { + emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload)); + emit_insn (gen_mextr4 (quad2, cxtload, blink)); + } + else + { + emit_insn (gen_mextr4 (quad1, cxtload, ptabs)); + emit_insn (gen_mshflo_l_di (quad2, blink, cxtload)); + } + emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1); + emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2); + emit_insn (gen_ic_invalidate_line (tramp)); + return; + } + else if (TARGET_SHCOMPACT) + { + emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr)); + return; + } + emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), + gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301, + SImode)); + emit_move_insn (adjust_address (tramp_mem, SImode, 4), + gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009, + SImode)); + emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt); + emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr); + if (TARGET_HARVARD) + { + if (!TARGET_INLINE_IC_INVALIDATE + || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE)) + emit_library_call (function_symbol (NULL, "__ic_invalidate", + FUNCTION_ORDINARY), + LCT_NORMAL, VOIDmode, 1, tramp, SImode); + else + emit_insn (gen_ic_invalidate_line (tramp)); + } +} + +/* On SH5, trampolines are SHmedia code, so add 1 to the address. */ + +static rtx +sh_trampoline_adjust_address (rtx tramp) +{ + if (TARGET_SHMEDIA) + tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx, + gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN); + return tramp; +} + +/* FIXME: This is overly conservative. A SHcompact function that + receives arguments ``by reference'' will have them stored in its + own stack frame, so it must not pass pointers or references to + these arguments to other functions by means of sibling calls. */ +/* If PIC, we cannot make sibling calls to global functions + because the PLT requires r12 to be live. */ +static bool +sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +{ + return (1 + && (! TARGET_SHCOMPACT + || crtl->args.info.stack_regs == 0) + && ! sh_cfun_interrupt_handler_p () + && (! flag_pic + || (decl && ! TREE_PUBLIC (decl)) + || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT))); +} + +/* Machine specific built-in functions. */ + +struct builtin_description +{ + const enum insn_code icode; + const char *const name; + int signature; + tree fndecl; +}; + +/* describe number and signedness of arguments; arg[0] == result + (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */ +/* 9: 64-bit pointer, 10: 32-bit pointer */ +static const char signature_args[][4] = +{ +#define SH_BLTIN_V2SI2 0 + { 4, 4 }, +#define SH_BLTIN_V4HI2 1 + { 4, 4 }, +#define SH_BLTIN_V2SI3 2 + { 4, 4, 4 }, +#define SH_BLTIN_V4HI3 3 + { 4, 4, 4 }, +#define SH_BLTIN_V8QI3 4 + { 4, 4, 4 }, +#define SH_BLTIN_MAC_HISI 5 + { 1, 4, 4, 1 }, +#define SH_BLTIN_SH_HI 6 + { 4, 4, 1 }, +#define SH_BLTIN_SH_SI 7 + { 4, 4, 1 }, +#define SH_BLTIN_V4HI2V2SI 8 + { 4, 4, 4 }, +#define SH_BLTIN_V4HI2V8QI 9 + { 4, 4, 4 }, +#define SH_BLTIN_SISF 10 + { 4, 2 }, +#define SH_BLTIN_LDUA_L 11 + { 2, 10 }, +#define SH_BLTIN_LDUA_Q 12 + { 1, 10 }, +#define SH_BLTIN_STUA_L 13 + { 0, 10, 2 }, +#define SH_BLTIN_STUA_Q 14 + { 0, 10, 1 }, +#define SH_BLTIN_LDUA_L64 15 + { 2, 9 }, +#define SH_BLTIN_LDUA_Q64 16 + { 1, 9 }, +#define SH_BLTIN_STUA_L64 17 + { 0, 9, 2 }, +#define SH_BLTIN_STUA_Q64 18 + { 0, 9, 1 }, +#define SH_BLTIN_NUM_SHARED_SIGNATURES 19 +#define SH_BLTIN_2 19 +#define SH_BLTIN_SU 19 + { 1, 2 }, +#define SH_BLTIN_3 20 +#define SH_BLTIN_SUS 20 + { 2, 2, 1 }, +#define SH_BLTIN_PSSV 21 + { 0, 8, 2, 2 }, +#define SH_BLTIN_XXUU 22 +#define SH_BLTIN_UUUU 22 + { 1, 1, 1, 1 }, +#define SH_BLTIN_PV 23 + { 0, 8 }, +}; +/* mcmv: operands considered unsigned. */ +/* mmulsum_wq, msad_ubq: result considered unsigned long long. */ +/* mperm: control value considered unsigned int. */ +/* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */ +/* mshards_q: returns signed short. */ +/* nsb: takes long long arg, returns unsigned char. */ +static struct builtin_description bdesc[] = +{ + { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 }, + { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 }, + { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 }, + { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 }, + { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 }, + { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 }, + { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 }, + { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 }, + { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 }, + { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 }, + { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 }, + { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 }, + { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 }, + { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 }, + { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 }, + { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 }, + { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 }, + { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 }, + { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 }, + { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 }, + { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 }, + { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 }, + { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 }, + { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 }, + { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 }, + { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 }, + { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 }, + { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 }, + { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 }, + { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 }, + { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 }, + { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 }, + { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 }, + { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 }, + { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 }, + { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 }, + { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 }, + { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 }, + { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 }, + { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 }, + { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 }, + { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 }, + { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 }, + { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 }, + { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 }, + { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 }, + { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 }, + { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 }, + { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 }, + { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 }, + { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 }, + { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 }, + { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 }, + { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 }, + { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 }, + { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 }, + { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 }, + { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 }, + { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 }, + { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 }, + { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 }, + { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 }, +}; + +static void +sh_media_init_builtins (void) +{ + tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES]; + struct builtin_description *d; + + memset (shared, 0, sizeof shared); + for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++) + { + tree type, arg_type = 0; + int signature = d->signature; + int i; + + if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature]) + type = shared[signature]; + else + { + int has_result = signature_args[signature][0] != 0; + + if ((signature_args[signature][1] & 8) + && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32) + || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64))) + continue; + if (! TARGET_FPU_ANY + && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode)) + continue; + type = void_list_node; + for (i = 3; ; i--) + { + int arg = signature_args[signature][i]; + int opno = i - 1 + has_result; + + if (arg & 8) + arg_type = ptr_type_node; + else if (arg) + arg_type = (*lang_hooks.types.type_for_mode) + (insn_data[d->icode].operand[opno].mode, + (arg & 1)); + else if (i) + continue; + else + arg_type = void_type_node; + if (i == 0) + break; + type = tree_cons (NULL_TREE, arg_type, type); + } + type = build_function_type (arg_type, type); + if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES) + shared[signature] = type; + } + d->fndecl = + add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD, + NULL, NULL_TREE); + } +} + +/* Returns the shmedia builtin decl for CODE. */ + +static tree +sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (code >= ARRAY_SIZE (bdesc)) + return error_mark_node; + + return bdesc[code].fndecl; +} + +/* Implements target hook vector_mode_supported_p. */ +bool +sh_vector_mode_supported_p (enum machine_mode mode) +{ + if (TARGET_FPU_ANY + && ((mode == V2SFmode) + || (mode == V4SFmode) + || (mode == V16SFmode))) + return true; + + else if (TARGET_SHMEDIA + && ((mode == V8QImode) + || (mode == V2HImode) + || (mode == V4HImode) + || (mode == V2SImode))) + return true; + + return false; +} + +bool +sh_frame_pointer_required (void) +{ +/* If needed override this in other tm.h files to cope with various OS + lossage requiring a frame pointer. */ + if (SUBTARGET_FRAME_POINTER_REQUIRED) + return true; + + if (crtl->profile) + return true; + + return false; +} + +/* Implements target hook dwarf_calling_convention. Return an enum + of dwarf_calling_convention. */ +int +sh_dwarf_calling_convention (const_tree func) +{ + if (sh_attr_renesas_p (func)) + return DW_CC_GNU_renesas_sh; + + return DW_CC_normal; +} + +static void +sh_init_builtins (void) +{ + if (TARGET_SHMEDIA) + sh_media_init_builtins (); +} + +/* Returns the sh builtin decl for CODE. */ + +static tree +sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (TARGET_SHMEDIA) + return sh_media_builtin_decl (code, initialize_p); + + return error_mark_node; +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, int ignore) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + const struct builtin_description *d = &bdesc[fcode]; + enum insn_code icode = d->icode; + int signature = d->signature; + enum machine_mode tmode = VOIDmode; + int nop = 0, i; + rtx op[4]; + rtx pat = 0; + + if (signature_args[signature][0]) + { + if (ignore) + return 0; + + tmode = insn_data[icode].operand[0].mode; + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + op[nop++] = target; + } + else + target = 0; + + for (i = 1; i <= 3; i++, nop++) + { + tree arg; + enum machine_mode opmode, argmode; + tree optype; + + if (! signature_args[signature][i]) + break; + arg = CALL_EXPR_ARG (exp, i - 1); + if (arg == error_mark_node) + return const0_rtx; + if (signature_args[signature][i] & 8) + { + opmode = ptr_mode; + optype = ptr_type_node; + } + else + { + opmode = insn_data[icode].operand[nop].mode; + optype = (*lang_hooks.types.type_for_mode) (opmode, 0); + } + argmode = TYPE_MODE (TREE_TYPE (arg)); + if (argmode != opmode) + arg = build1 (NOP_EXPR, optype, arg); + op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL); + if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode)) + op[nop] = copy_to_mode_reg (opmode, op[nop]); + } + + switch (nop) + { + case 1: + pat = (*insn_data[d->icode].genfun) (op[0]); + break; + case 2: + pat = (*insn_data[d->icode].genfun) (op[0], op[1]); + break; + case 3: + pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]); + break; + case 4: + pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]); + break; + default: + gcc_unreachable (); + } + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +void +sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1) +{ + rtx sel0 = const0_rtx; + rtx sel1 = const1_rtx; + rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op; + rtx op = gen_rtx_fmt_e (code, SFmode, op1); + + emit_insn ((*fn) (op0, op1, op, sel0, sel0)); + emit_insn ((*fn) (op0, op1, op, sel1, sel1)); +} + +void +sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2) +{ + rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2); + + emit_insn (gen_binary_sf_op0 (op0, op1, op2, op)); + emit_insn (gen_binary_sf_op1 (op0, op1, op2, op)); +} + +/* Return true if hard register REGNO can hold a value of machine-mode MODE. + We can allow any mode in any general register. The special registers + only allow SImode. Don't allow any mode in the PR. + + We cannot hold DCmode values in the XD registers because alter_reg + handles subregs of them incorrectly. We could work around this by + spacing the XD registers like the DR registers, but this would require + additional memory in every compilation to hold larger register vectors. + We could hold SFmode / SCmode values in XD registers, but that + would require a tertiary reload when reloading from / to memory, + and a secondary reload to reload from / to general regs; that + seems to be a loosing proposition. + + We want to allow TImode FP regs so that when V4SFmode is loaded as TImode, + it won't be ferried through GP registers first. */ + +bool +sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) +{ + if (SPECIAL_REGISTER_P (regno)) + return mode == SImode; + + if (regno == FPUL_REG) + return (mode == SImode || mode == SFmode); + + if (FP_REGISTER_P (regno) && mode == SFmode) + return true; + + if (mode == V2SFmode) + { + if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0) + || GENERAL_REGISTER_P (regno))) + return true; + else + return false; + } + + if (mode == V4SFmode) + { + if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0) + || GENERAL_REGISTER_P (regno)) + return true; + else + return false; + } + + if (mode == V16SFmode) + { + if (TARGET_SHMEDIA) + { + if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0) + return true; + else + return false; + } + else + return regno == FIRST_XD_REG; + } + + if (FP_REGISTER_P (regno)) + { + if (mode == SFmode + || mode == SImode + || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode) + || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode) + || mode == DCmode + || (TARGET_SHMEDIA + && (mode == DFmode || mode == DImode + || mode == V2SFmode || mode == TImode))) + && ((regno - FIRST_FP_REG) & 1) == 0) + || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode + && ((regno - FIRST_FP_REG) & 3) == 0)) + return true; + else + return false; + } + + if (XD_REGISTER_P (regno)) + return mode == DFmode; + + if (TARGET_REGISTER_P (regno)) + return (mode == DImode || mode == SImode || mode == PDImode); + + if (regno == PR_REG) + return mode == SImode; + + if (regno == FPSCR_REG) + return mode == PSImode; + + /* FIXME. This works around PR target/37633 for -O0. */ + if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4) + { + unsigned int n = GET_MODE_SIZE (mode) / 8; + + if (regno >= FIRST_GENERAL_REG + 10 - n + 1 + && regno <= FIRST_GENERAL_REG + 14) + return false; + } + + return true; +} + +/* Return the class of registers for which a mode change from FROM to TO + is invalid. */ +bool +sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, + enum reg_class rclass) +{ + /* We want to enable the use of SUBREGs as a means to + VEC_SELECT a single element of a vector. */ + if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode) + return (reg_classes_intersect_p (GENERAL_REGS, rclass)); + + if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)) + { + if (TARGET_LITTLE_ENDIAN) + { + if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8) + return reg_classes_intersect_p (DF_REGS, rclass); + } + else + { + if (GET_MODE_SIZE (from) < 8) + return reg_classes_intersect_p (DF_HI_REGS, rclass); + } + } + return 0; +} + +/* Return true if registers in machine mode MODE will likely be + allocated to registers in small register classes. */ + +bool +sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return (! TARGET_SHMEDIA); +} + +/* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times + that label is used. */ + +void +sh_mark_label (rtx address, int nuses) +{ + if (GOTOFF_P (address)) + { + /* Extract the label or symbol. */ + address = XEXP (address, 0); + if (GET_CODE (address) == PLUS) + address = XEXP (address, 0); + address = XVECEXP (address, 0, 0); + } + if (GET_CODE (address) == LABEL_REF + && LABEL_P (XEXP (address, 0))) + LABEL_NUSES (XEXP (address, 0)) += nuses; +} + +/* Compute extra cost of moving data between one register class + and another. */ + +/* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass + uses this information. Hence, the general register <-> floating point + register information here is not used for SFmode. */ + +static int +sh_register_move_cost (enum machine_mode mode, + reg_class_t srcclass, reg_class_t dstclass) +{ + if (dstclass == T_REGS || dstclass == PR_REGS) + return 10; + + if (dstclass == MAC_REGS && srcclass == MAC_REGS) + return 4; + + if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD + && REGCLASS_HAS_FP_REG (srcclass) + && REGCLASS_HAS_FP_REG (dstclass)) + return 4; + + if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS) + return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7); + + if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS) + || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass))) + return 9; + + if ((REGCLASS_HAS_FP_REG (dstclass) + && REGCLASS_HAS_GENERAL_REG (srcclass)) + || (REGCLASS_HAS_GENERAL_REG (dstclass) + && REGCLASS_HAS_FP_REG (srcclass))) + return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + * ((GET_MODE_SIZE (mode) + 7) / 8U)); + + if ((dstclass == FPUL_REGS + && REGCLASS_HAS_GENERAL_REG (srcclass)) + || (srcclass == FPUL_REGS + && REGCLASS_HAS_GENERAL_REG (dstclass))) + return 5; + + if ((dstclass == FPUL_REGS + && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS)) + || (srcclass == FPUL_REGS + && (dstclass == PR_REGS || dstclass == MAC_REGS))) + return 7; + + if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass)) + || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass))) + return 20; + + /* ??? ptabs faults on (value & 0x3) == 0x3 */ + if (TARGET_SHMEDIA + && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS)) + { + if (sh_gettrcost >= 0) + return sh_gettrcost; + else if (!TARGET_PT_FIXED) + return 100; + } + + if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass)) + || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass))) + return 4; + + if (TARGET_SHMEDIA + || (TARGET_FMOVD + && ! REGCLASS_HAS_GENERAL_REG (srcclass) + && ! REGCLASS_HAS_GENERAL_REG (dstclass))) + return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U); + + return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U); +} + +static rtx emit_load_ptr (rtx, rtx); + +static rtx +emit_load_ptr (rtx reg, rtx addr) +{ + rtx mem = gen_const_mem (ptr_mode, addr); + + if (Pmode != ptr_mode) + mem = gen_rtx_SIGN_EXTEND (Pmode, mem); + return emit_move_insn (reg, mem); +} + +static void +sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, + tree function) +{ + CUMULATIVE_ARGS cum; + int structure_value_byref = 0; + rtx this_rtx, this_value, sibcall, insns, funexp; + tree funtype = TREE_TYPE (function); + int simple_add = CONST_OK_FOR_ADD (delta); + int did_load = 0; + rtx scratch0, scratch1, scratch2; + unsigned i; + + reload_completed = 1; + epilogue_completed = 1; + current_function_uses_only_leaf_regs = 1; + + emit_note (NOTE_INSN_PROLOGUE_END); + + /* Find the "this" pointer. We have such a wide range of ABIs for the + SH that it's best to do this completely machine independently. + "this" is passed as first argument, unless a structure return pointer + comes first, in which case "this" comes second. */ + INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1); +#ifndef PCC_STATIC_STRUCT_RETURN + if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) + structure_value_byref = 1; +#endif /* not PCC_STATIC_STRUCT_RETURN */ + if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0) + { + tree ptype = build_pointer_type (TREE_TYPE (funtype)); + + sh_function_arg_advance (&cum, Pmode, ptype, true); + } + this_rtx = sh_function_arg (&cum, Pmode, ptr_type_node, true); + + /* For SHcompact, we only have r0 for a scratch register: r1 is the + static chain pointer (even if you can't have nested virtual functions + right now, someone might implement them sometime), and the rest of the + registers are used for argument passing, are callee-saved, or reserved. */ + /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg / + -ffixed-reg has been used. */ + if (! call_used_regs[0] || fixed_regs[0]) + error ("r0 needs to be available as a call-clobbered register"); + scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0); + if (! TARGET_SH5) + { + if (call_used_regs[1] && ! fixed_regs[1]) + scratch1 = gen_rtx_REG (ptr_mode, 1); + /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer + pointing where to return struct values. */ + if (call_used_regs[3] && ! fixed_regs[3]) + scratch2 = gen_rtx_REG (Pmode, 3); + } + else if (TARGET_SHMEDIA) + { + for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++) + if (i != REGNO (scratch0) && + call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i)) + { + scratch1 = gen_rtx_REG (ptr_mode, i); + break; + } + if (scratch1 == scratch0) + error ("need a second call-clobbered general purpose register"); + for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++) + if (call_used_regs[i] && ! fixed_regs[i]) + { + scratch2 = gen_rtx_REG (Pmode, i); + break; + } + if (scratch2 == scratch0) + error ("need a call-clobbered target register"); + } + + this_value = plus_constant (this_rtx, delta); + if (vcall_offset + && (simple_add || scratch0 != scratch1) + && strict_memory_address_p (ptr_mode, this_value)) + { + emit_load_ptr (scratch0, this_value); + did_load = 1; + } + + if (!delta) + ; /* Do nothing. */ + else if (simple_add) + emit_move_insn (this_rtx, this_value); + else + { + emit_move_insn (scratch1, GEN_INT (delta)); + emit_insn (gen_add2_insn (this_rtx, scratch1)); + } + + if (vcall_offset) + { + rtx offset_addr; + + if (!did_load) + emit_load_ptr (scratch0, this_rtx); + + offset_addr = plus_constant (scratch0, vcall_offset); + if (strict_memory_address_p (ptr_mode, offset_addr)) + ; /* Do nothing. */ + else if (! TARGET_SH5 && scratch0 != scratch1) + { + /* scratch0 != scratch1, and we have indexed loads. Get better + schedule by loading the offset into r1 and using an indexed + load - then the load of r1 can issue before the load from + (this_rtx + delta) finishes. */ + emit_move_insn (scratch1, GEN_INT (vcall_offset)); + offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1); + } + else if (CONST_OK_FOR_ADD (vcall_offset)) + { + emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset))); + offset_addr = scratch0; + } + else if (scratch0 != scratch1) + { + emit_move_insn (scratch1, GEN_INT (vcall_offset)); + emit_insn (gen_add2_insn (scratch0, scratch1)); + offset_addr = scratch0; + } + else + gcc_unreachable (); /* FIXME */ + emit_load_ptr (scratch0, offset_addr); + + if (Pmode != ptr_mode) + scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0); + emit_insn (gen_add2_insn (this_rtx, scratch0)); + } + + /* Generate a tail call to the target function. */ + if (! TREE_USED (function)) + { + assemble_external (function); + TREE_USED (function) = 1; + } + funexp = XEXP (DECL_RTL (function), 0); + /* If the function is overridden, so is the thunk, hence we don't + need GOT addressing even if this is a public symbol. */ +#if 0 + if (TARGET_SH1 && ! flag_weak) + sibcall = gen_sibcalli_thunk (funexp, const0_rtx); + else +#endif + if (TARGET_SH2 && flag_pic) + { + sibcall = gen_sibcall_pcrel (funexp, const0_rtx); + XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2; + } + else + { + if (TARGET_SHMEDIA && flag_pic) + { + funexp = gen_sym2PIC (funexp); + PUT_MODE (funexp, Pmode); + } + emit_move_insn (scratch2, funexp); + funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2); + sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX); + } + sibcall = emit_call_insn (sibcall); + SIBLING_CALL_P (sibcall) = 1; + use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx); + emit_barrier (); + + /* Run just enough of rest_of_compilation to do scheduling and get + the insns emitted. Note that use_thunk calls + assemble_start_function and assemble_end_function. */ + + insn_locators_alloc (); + insns = get_insns (); + + if (optimize > 0) + { + if (! cfun->cfg) + init_flow (cfun); + split_all_insns_noflow (); + } + + sh_reorg (); + + if (optimize > 0 && flag_delayed_branch) + dbr_schedule (insns); + + shorten_branches (insns); + final_start_function (insns, file, 1); + final (insns, file, 1); + final_end_function (); + + reload_completed = 0; + epilogue_completed = 0; +} + +rtx +function_symbol (rtx target, const char *name, enum sh_function_kind kind) +{ + rtx sym; + + /* If this is not an ordinary function, the name usually comes from a + string literal or an sprintf buffer. Make sure we use the same + string consistently, so that cse will be able to unify address loads. */ + if (kind != FUNCTION_ORDINARY) + name = IDENTIFIER_POINTER (get_identifier (name)); + sym = gen_rtx_SYMBOL_REF (Pmode, name); + SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION; + if (flag_pic) + switch (kind) + { + case FUNCTION_ORDINARY: + break; + case SFUNC_GOT: + { + rtx reg = target ? target : gen_reg_rtx (Pmode); + + emit_insn (gen_symGOT2reg (reg, sym)); + sym = reg; + break; + } + case SFUNC_STATIC: + { + /* ??? To allow cse to work, we use GOTOFF relocations. + we could add combiner patterns to transform this into + straight pc-relative calls with sym2PIC / bsrf when + label load and function call are still 1:1 and in the + same basic block during combine. */ + rtx reg = target ? target : gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTOFF2reg (reg, sym)); + sym = reg; + break; + } + } + if (target && sym != target) + { + emit_move_insn (target, sym); + return target; + } + return sym; +} + +/* Find the number of a general purpose register in S. */ +static int +scavenge_reg (HARD_REG_SET *s) +{ + int r; + for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++) + if (TEST_HARD_REG_BIT (*s, r)) + return r; + return -1; +} + +rtx +sh_get_pr_initial_val (void) +{ + rtx val; + + /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the + PR register on SHcompact, because it might be clobbered by the prologue. + We check first if that is known to be the case. */ + if (TARGET_SHCOMPACT + && ((crtl->args.info.call_cookie + & ~ CALL_COOKIE_RET_TRAMP (1)) + || crtl->saves_all_registers)) + return gen_frame_mem (SImode, return_address_pointer_rtx); + + /* If we haven't finished rtl generation, there might be a nonlocal label + that we haven't seen yet. + ??? get_hard_reg_initial_val fails if it is called after register + allocation has started, unless it has been called before for the + same register. And even then, we end in trouble if we didn't use + the register in the same basic block before. So call + get_hard_reg_initial_val now and wrap it in an unspec if we might + need to replace it. */ + /* ??? We also must do this for TARGET_SH1 in general, because otherwise + combine can put the pseudo returned by get_hard_reg_initial_val into + instructions that need a general purpose registers, which will fail to + be recognized when the pseudo becomes allocated to PR. */ + val + = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG); + if (TARGET_SH1) + return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA); + return val; +} + +int +sh_expand_t_scc (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx target = operands[0]; + rtx op0 = operands[2]; + rtx op1 = operands[3]; + rtx result = target; + HOST_WIDE_INT val; + + if (!REG_P (op0) || REGNO (op0) != T_REG + || !CONST_INT_P (op1)) + return 0; + if (!REG_P (result)) + result = gen_reg_rtx (SImode); + val = INTVAL (op1); + if ((code == EQ && val == 1) || (code == NE && val == 0)) + emit_insn (gen_movt (result)); + else if (TARGET_SH2A && ((code == EQ && val == 0) + || (code == NE && val == 1))) + emit_insn (gen_xorsi3_movrt (result)); + else if ((code == EQ && val == 0) || (code == NE && val == 1)) + { + emit_clobber (result); + emit_insn (gen_subc (result, result, result)); + emit_insn (gen_addsi3 (result, result, const1_rtx)); + } + else if (code == EQ || code == NE) + emit_insn (gen_move_insn (result, GEN_INT (code == NE))); + else + return 0; + if (result != target) + emit_move_insn (target, result); + return 1; +} + +/* INSN is an sfunc; return the rtx that describes the address used. */ +static rtx +extract_sfunc_addr (rtx insn) +{ + rtx pattern, part = NULL_RTX; + int len, i; + + pattern = PATTERN (insn); + len = XVECLEN (pattern, 0); + for (i = 0; i < len; i++) + { + part = XVECEXP (pattern, 0, i); + if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode + && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0)))) + return XEXP (part, 0); + } + gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE); + return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1); +} + +/* Verify that the register in use_sfunc_addr still agrees with the address + used in the sfunc. This prevents fill_slots_from_thread from changing + use_sfunc_addr. + INSN is the use_sfunc_addr instruction, and REG is the register it + guards. */ +int +check_use_sfunc_addr (rtx insn, rtx reg) +{ + /* Search for the sfunc. It should really come right after INSN. */ + while ((insn = NEXT_INSN (insn))) + { + if (LABEL_P (insn) || JUMP_P (insn)) + break; + if (! INSN_P (insn)) + continue; + + if (GET_CODE (PATTERN (insn)) == SEQUENCE) + insn = XVECEXP (PATTERN (insn), 0, 0); + if (GET_CODE (PATTERN (insn)) != PARALLEL + || get_attr_type (insn) != TYPE_SFUNC) + continue; + return rtx_equal_p (extract_sfunc_addr (insn), reg); + } + gcc_unreachable (); +} + +/* This function returns a constant rtx that represents pi / 2**15 in + SFmode. it's used to scale SFmode angles, in radians, to a + fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi + maps to 0x10000). */ + +static GTY(()) rtx sh_fsca_sf2int_rtx; + +rtx +sh_fsca_sf2int (void) +{ + if (! sh_fsca_sf2int_rtx) + { + REAL_VALUE_TYPE rv; + + real_from_string (&rv, "10430.378350470453"); + sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode); + } + + return sh_fsca_sf2int_rtx; +} + +/* This function returns a constant rtx that represents pi / 2**15 in + DFmode. it's used to scale DFmode angles, in radians, to a + fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi + maps to 0x10000). */ + +static GTY(()) rtx sh_fsca_df2int_rtx; + +rtx +sh_fsca_df2int (void) +{ + if (! sh_fsca_df2int_rtx) + { + REAL_VALUE_TYPE rv; + + real_from_string (&rv, "10430.378350470453"); + sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode); + } + + return sh_fsca_df2int_rtx; +} + +/* This function returns a constant rtx that represents 2**15 / pi in + SFmode. it's used to scale a fixed-point signed 16.16-bit fraction + of a full circle back to a SFmode value, i.e., 0x10000 maps to + 2*pi). */ + +static GTY(()) rtx sh_fsca_int2sf_rtx; + +rtx +sh_fsca_int2sf (void) +{ + if (! sh_fsca_int2sf_rtx) + { + REAL_VALUE_TYPE rv; + + real_from_string (&rv, "9.587379924285257e-5"); + sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode); + } + + return sh_fsca_int2sf_rtx; +} + +/* Initialize the CUMULATIVE_ARGS structure. */ + +void +sh_init_cumulative_args (CUMULATIVE_ARGS * pcum, + tree fntype, + rtx libname ATTRIBUTE_UNUSED, + tree fndecl, + signed int n_named_args, + enum machine_mode mode) +{ + pcum->arg_count [(int) SH_ARG_FLOAT] = 0; + pcum->free_single_fp_reg = 0; + pcum->stack_regs = 0; + pcum->byref_regs = 0; + pcum->byref = 0; + pcum->outgoing = (n_named_args == -1) ? 0 : 1; + + /* XXX - Should we check TARGET_HITACHI here ??? */ + pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0; + + if (fntype) + { + pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi) + && aggregate_value_p (TREE_TYPE (fntype), fndecl)); + pcum->prototype_p = prototype_p (fntype); + pcum->arg_count [(int) SH_ARG_INT] + = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl); + + pcum->call_cookie + = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT + && pcum->arg_count [(int) SH_ARG_INT] == 0 + && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode + ? int_size_in_bytes (TREE_TYPE (fntype)) + : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4 + && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype))) + == FIRST_RET_REG)); + } + else + { + pcum->arg_count [(int) SH_ARG_INT] = 0; + pcum->prototype_p = FALSE; + if (mode != VOIDmode) + { + pcum->call_cookie = + CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT + && GET_MODE_SIZE (mode) > 4 + && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG); + + /* If the default ABI is the Renesas ABI then all library + calls must assume that the library will be using the + Renesas ABI. So if the function would return its result + in memory then we must force the address of this memory + block onto the stack. Ideally we would like to call + targetm.calls.return_in_memory() here but we do not have + the TYPE or the FNDECL available so we synthesize the + contents of that function as best we can. */ + pcum->force_mem = + (TARGET_DEFAULT & MASK_HITACHI) + && (mode == BLKmode + || (GET_MODE_SIZE (mode) > 4 + && !(mode == DFmode + && TARGET_FPU_DOUBLE))); + } + else + { + pcum->call_cookie = 0; + pcum->force_mem = FALSE; + } + } +} + +/* Replace any occurrence of FROM(n) in X with TO(n). The function does + not enter into CONST_DOUBLE for the replace. + + Note that copying is not done so X must not be shared unless all copies + are to be modified. + + This is like replace_rtx, except that we operate on N_REPLACEMENTS + replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is + replacements[n*2+1] - and that we take mode changes into account. + + If a replacement is ambiguous, return NULL_RTX. + + If MODIFY is zero, don't modify any rtl in place, + just return zero or nonzero for failure / success. */ + +rtx +replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify) +{ + int i, j; + const char *fmt; + + /* The following prevents loops occurrence when we change MEM in + CONST_DOUBLE onto the same CONST_DOUBLE. */ + if (x != 0 && GET_CODE (x) == CONST_DOUBLE) + return x; + + for (i = n_replacements - 1; i >= 0 ; i--) + if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1])) + return replacements[i*2+1]; + + /* Allow this function to make replacements in EXPR_LISTs. */ + if (x == 0) + return 0; + + if (GET_CODE (x) == SUBREG) + { + rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements, + n_replacements, modify); + + if (CONST_INT_P (new_rtx)) + { + x = simplify_subreg (GET_MODE (x), new_rtx, + GET_MODE (SUBREG_REG (x)), + SUBREG_BYTE (x)); + if (! x) + abort (); + } + else if (modify) + SUBREG_REG (x) = new_rtx; + + return x; + } + else if (REG_P (x)) + { + unsigned regno = REGNO (x); + unsigned nregs = (regno < FIRST_PSEUDO_REGISTER + ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1); + rtx result = NULL_RTX; + + for (i = n_replacements - 1; i >= 0; i--) + { + rtx from = replacements[i*2]; + rtx to = replacements[i*2+1]; + unsigned from_regno, from_nregs, to_regno, new_regno; + + if (!REG_P (from)) + continue; + from_regno = REGNO (from); + from_nregs = (from_regno < FIRST_PSEUDO_REGISTER + ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1); + if (regno < from_regno + from_nregs && regno + nregs > from_regno) + { + if (regno < from_regno + || regno + nregs > from_regno + nregs + || !REG_P (to) + || result) + return NULL_RTX; + to_regno = REGNO (to); + if (to_regno < FIRST_PSEUDO_REGISTER) + { + new_regno = regno + to_regno - from_regno; + if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x)) + != nregs) + return NULL_RTX; + result = gen_rtx_REG (GET_MODE (x), new_regno); + } + else if (GET_MODE (x) <= GET_MODE (to)) + result = gen_lowpart_common (GET_MODE (x), to); + else + result = gen_lowpart_SUBREG (GET_MODE (x), to); + } + } + return result ? result : x; + } + else if (GET_CODE (x) == ZERO_EXTEND) + { + rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements, + n_replacements, modify); + + if (CONST_INT_P (new_rtx)) + { + x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x), + new_rtx, GET_MODE (XEXP (x, 0))); + if (! x) + abort (); + } + else if (modify) + XEXP (x, 0) = new_rtx; + + return x; + } + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + rtx new_rtx; + + if (fmt[i] == 'e') + { + new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements, + n_replacements, modify); + if (!new_rtx) + return NULL_RTX; + if (modify) + XEXP (x, i) = new_rtx; + } + else if (fmt[i] == 'E') + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + { + new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements, + n_replacements, modify); + if (!new_rtx) + return NULL_RTX; + if (modify) + XVECEXP (x, i, j) = new_rtx; + } + } + + return x; +} + +rtx +sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext) +{ + enum rtx_code code = TRUNCATE; + + if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND) + { + rtx inner = XEXP (x, 0); + enum machine_mode inner_mode = GET_MODE (inner); + + if (inner_mode == mode) + return inner; + else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode)) + x = inner; + else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode) + && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND)) + { + code = GET_CODE (x); + x = inner; + } + } + return gen_rtx_fmt_e (code, mode, x); +} + +/* called via for_each_rtx after reload, to clean up truncates of + registers that span multiple actual hard registers. */ +int +shmedia_cleanup_truncate (rtx *p, void *n_changes) +{ + rtx x = *p, reg; + + if (GET_CODE (x) != TRUNCATE) + return 0; + reg = XEXP (x, 0); + if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg)) + { + enum machine_mode reg_mode = GET_MODE (reg); + XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, + subreg_lowpart_offset (DImode, reg_mode)); + *(int*) n_changes += 1; + return -1; + } + return 0; +} + +/* Load and store depend on the highpart of the address. However, + set_attr_alternative does not give well-defined results before reload, + so we must look at the rtl ourselves to see if any of the feeding + registers is used in a memref. */ + +/* Called by sh_contains_memref_p via for_each_rtx. */ +static int +sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED) +{ + return (MEM_P (*loc)); +} + +/* Return nonzero iff INSN contains a MEM. */ +int +sh_contains_memref_p (rtx insn) +{ + return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL); +} + +/* Return nonzero iff INSN loads a banked register. */ +int +sh_loads_bankedreg_p (rtx insn) +{ + if (GET_CODE (PATTERN (insn)) == SET) + { + rtx op = SET_DEST (PATTERN(insn)); + if (REG_P (op) && BANKED_REGISTER_P (REGNO (op))) + return 1; + } + + return 0; +} + +/* FNADDR is the MEM expression from a call expander. Return an address + to use in an SHmedia insn pattern. */ +rtx +shmedia_prepare_call_address (rtx fnaddr, int is_sibcall) +{ + int is_sym; + + fnaddr = XEXP (fnaddr, 0); + is_sym = GET_CODE (fnaddr) == SYMBOL_REF; + if (flag_pic && is_sym) + { + if (! SYMBOL_REF_LOCAL_P (fnaddr)) + { + rtx reg = gen_reg_rtx (Pmode); + + /* We must not use GOTPLT for sibcalls, because PIC_REG + must be restored before the PLT code gets to run. */ + if (is_sibcall) + emit_insn (gen_symGOT2reg (reg, fnaddr)); + else + emit_insn (gen_symGOTPLT2reg (reg, fnaddr)); + fnaddr = reg; + } + else + { + fnaddr = gen_sym2PIC (fnaddr); + PUT_MODE (fnaddr, Pmode); + } + } + /* If ptabs might trap, make this visible to the rest of the compiler. + We generally assume that symbols pertain to valid locations, but + it is possible to generate invalid symbols with asm or linker tricks. + In a list of functions where each returns its successor, an invalid + symbol might denote an empty list. */ + if (!TARGET_PT_FIXED + && (!is_sym || TARGET_INVALID_SYMBOLS) + && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr)))) + { + rtx tr = gen_reg_rtx (PDImode); + + emit_insn (gen_ptabs (tr, fnaddr)); + fnaddr = tr; + } + else if (! target_reg_operand (fnaddr, Pmode)) + fnaddr = copy_to_mode_reg (Pmode, fnaddr); + return fnaddr; +} + +/* Implement TARGET_PREFERRED_RELOAD_CLASS. */ + +static reg_class_t +sh_preferred_reload_class (rtx x, reg_class_t rclass) +{ + if (rclass == NO_REGS + && TARGET_SHMEDIA + && (CONST_DOUBLE_P (x) + || GET_CODE (x) == SYMBOL_REF + || PIC_ADDR_P (x))) + return GENERAL_REGS; + + return rclass; +} + +/* Implement TARGET_SECONDARY_RELOAD. */ + +static reg_class_t +sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, + enum machine_mode mode, secondary_reload_info *sri) +{ + enum reg_class rclass = (enum reg_class) rclass_i; + + if (in_p) + { + if (REGCLASS_HAS_FP_REG (rclass) + && ! TARGET_SHMEDIA + && immediate_operand ((x), mode) + && ! ((fp_zero_operand (x) || fp_one_operand (x)) + && mode == SFmode && fldi_ok ())) + switch (mode) + { + case SFmode: + sri->icode = CODE_FOR_reload_insf__frn; + return NO_REGS; + case DFmode: + sri->icode = CODE_FOR_reload_indf__frn; + return NO_REGS; + case SImode: + /* ??? If we knew that we are in the appropriate mode - + single precision - we could use a reload pattern directly. */ + return FPUL_REGS; + default: + abort (); + } + if (rclass == FPUL_REGS + && ((REG_P (x) + && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG + || REGNO (x) == T_REG)) + || GET_CODE (x) == PLUS)) + return GENERAL_REGS; + if (rclass == FPUL_REGS && immediate_operand (x, mode)) + { + if (satisfies_constraint_I08 (x) || fp_zero_operand (x)) + return GENERAL_REGS; + else if (mode == SFmode) + return FP_REGS; + sri->icode = CODE_FOR_reload_insi__i_fpul; + return NO_REGS; + } + if (rclass == FPSCR_REGS + && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER) + || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS))) + return GENERAL_REGS; + if (REGCLASS_HAS_FP_REG (rclass) + && TARGET_SHMEDIA + && immediate_operand (x, mode) + && x != CONST0_RTX (GET_MODE (x)) + && GET_MODE (x) != V4SFmode) + return GENERAL_REGS; + if ((mode == QImode || mode == HImode) + && TARGET_SHMEDIA && inqhi_operand (x, mode)) + { + sri->icode = ((mode == QImode) + ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi); + return NO_REGS; + } + if (TARGET_SHMEDIA && rclass == GENERAL_REGS + && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x))) + return TARGET_REGS; + } /* end of input-only processing. */ + + if (((REGCLASS_HAS_FP_REG (rclass) + && (REG_P (x) + && (GENERAL_OR_AP_REGISTER_P (REGNO (x)) + || (FP_REGISTER_P (REGNO (x)) && mode == SImode + && TARGET_FMOVD)))) + || (REGCLASS_HAS_GENERAL_REG (rclass) + && REG_P (x) + && FP_REGISTER_P (REGNO (x)))) + && ! TARGET_SHMEDIA + && (mode == SFmode || mode == SImode)) + return FPUL_REGS; + if ((rclass == FPUL_REGS + || (REGCLASS_HAS_FP_REG (rclass) + && ! TARGET_SHMEDIA && mode == SImode)) + && (MEM_P (x) + || (REG_P (x) + && (REGNO (x) >= FIRST_PSEUDO_REGISTER + || REGNO (x) == T_REG + || system_reg_operand (x, VOIDmode))))) + { + if (rclass == FPUL_REGS) + return GENERAL_REGS; + return FPUL_REGS; + } + if ((rclass == TARGET_REGS + || (TARGET_SHMEDIA && rclass == SIBCALL_REGS)) + && !satisfies_constraint_Csy (x) + && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x)))) + return GENERAL_REGS; + if ((rclass == MAC_REGS || rclass == PR_REGS) + && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x)) + && rclass != REGNO_REG_CLASS (REGNO (x))) + return GENERAL_REGS; + if (rclass != GENERAL_REGS && REG_P (x) + && TARGET_REGISTER_P (REGNO (x))) + return GENERAL_REGS; + return NO_REGS; +} + +static void +sh_conditional_register_usage (void) +{ + int regno; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++) + if (! VALID_REGISTER_P (regno)) + fixed_regs[regno] = call_used_regs[regno] = 1; + /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */ + if (TARGET_SH5) + { + call_used_regs[FIRST_GENERAL_REG + 8] + = call_used_regs[FIRST_GENERAL_REG + 9] = 1; + call_really_used_regs[FIRST_GENERAL_REG + 8] + = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1; + } + if (TARGET_SHMEDIA) + { + regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS; + CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]); + regno_reg_class[FIRST_FP_REG] = FP_REGS; + } + if (flag_pic) + { + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + } + /* Renesas saves and restores mac registers on call. */ + if (TARGET_HITACHI && ! TARGET_NOMACSAVE) + { + call_really_used_regs[MACH_REG] = 0; + call_really_used_regs[MACL_REG] = 0; + } + for (regno = FIRST_FP_REG + (TARGET_LITTLE_ENDIAN != 0); + regno <= LAST_FP_REG; regno += 2) + SET_HARD_REG_BIT (reg_class_contents[DF_HI_REGS], regno); + if (TARGET_SHMEDIA) + { + for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++) + if (! fixed_regs[regno] && call_really_used_regs[regno]) + SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno); + } + else + for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++) + if (! fixed_regs[regno] && call_really_used_regs[regno]) + SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno); +} + + +enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT; + +#include "gt-sh.h" diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h new file mode 100644 index 000000000..4579af327 --- /dev/null +++ b/gcc/config/sh/sh.h @@ -0,0 +1,2511 @@ +/* Definitions of target machine for GNU compiler for Renesas / SuperH SH. + Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, + 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Steve Chamberlain (sac@cygnus.com). + Improved by Jim Wilson (wilson@cygnus.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_SH_H +#define GCC_SH_H + +#include "config/vxworks-dummy.h" + +#define TARGET_VERSION \ + fputs (" (Hitachi SH)", stderr); + +/* Unfortunately, insn-attrtab.c doesn't include insn-codes.h. We can't + include it here, because bconfig.h is also included by gencodes.c . */ +/* ??? No longer true. */ +extern int code_for_indirect_jump_scratch; + +#define TARGET_CPU_CPP_BUILTINS() \ +do { \ + builtin_define ("__sh__"); \ + builtin_assert ("cpu=sh"); \ + builtin_assert ("machine=sh"); \ + switch ((int) sh_cpu) \ + { \ + case PROCESSOR_SH1: \ + builtin_define ("__sh1__"); \ + break; \ + case PROCESSOR_SH2: \ + builtin_define ("__sh2__"); \ + break; \ + case PROCESSOR_SH2E: \ + builtin_define ("__SH2E__"); \ + break; \ + case PROCESSOR_SH2A: \ + builtin_define ("__SH2A__"); \ + builtin_define (TARGET_SH2A_DOUBLE \ + ? (TARGET_FPU_SINGLE ? "__SH2A_SINGLE__" : "__SH2A_DOUBLE__") \ + : TARGET_FPU_ANY ? "__SH2A_SINGLE_ONLY__" \ + : "__SH2A_NOFPU__"); \ + break; \ + case PROCESSOR_SH3: \ + builtin_define ("__sh3__"); \ + builtin_define ("__SH3__"); \ + if (TARGET_HARD_SH4) \ + builtin_define ("__SH4_NOFPU__"); \ + break; \ + case PROCESSOR_SH3E: \ + builtin_define (TARGET_HARD_SH4 ? "__SH4_SINGLE_ONLY__" : "__SH3E__"); \ + break; \ + case PROCESSOR_SH4: \ + builtin_define (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__"); \ + break; \ + case PROCESSOR_SH4A: \ + builtin_define ("__SH4A__"); \ + builtin_define (TARGET_SH4 \ + ? (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__") \ + : TARGET_FPU_ANY ? "__SH4_SINGLE_ONLY__" \ + : "__SH4_NOFPU__"); \ + break; \ + case PROCESSOR_SH5: \ + { \ + builtin_define_with_value ("__SH5__", \ + TARGET_SHMEDIA64 ? "64" : "32", 0); \ + builtin_define_with_value ("__SHMEDIA__", \ + TARGET_SHMEDIA ? "1" : "0", 0); \ + if (! TARGET_FPU_DOUBLE) \ + builtin_define ("__SH4_NOFPU__"); \ + } \ + } \ + if (TARGET_FPU_ANY) \ + builtin_define ("__SH_FPU_ANY__"); \ + if (TARGET_FPU_DOUBLE) \ + builtin_define ("__SH_FPU_DOUBLE__"); \ + if (TARGET_HITACHI) \ + builtin_define ("__HITACHI__"); \ + if (TARGET_FMOVD) \ + builtin_define ("__FMOVD_ENABLED__"); \ + builtin_define (TARGET_LITTLE_ENDIAN \ + ? "__LITTLE_ENDIAN__" : "__BIG_ENDIAN__"); \ +} while (0) + +/* Value should be nonzero if functions must have frame pointers. + Zero means the frame pointer need not be set up (and parms may be accessed + via the stack pointer) in functions that seem suitable. */ + +#ifndef SUBTARGET_FRAME_POINTER_REQUIRED +#define SUBTARGET_FRAME_POINTER_REQUIRED 0 +#endif + + +/* Nonzero if this is an ELF target - compile time only */ +#define TARGET_ELF 0 + +/* Nonzero if we should generate code using type 2E insns. */ +#define TARGET_SH2E (TARGET_SH2 && TARGET_SH_E) + +/* Nonzero if we should generate code using type 2A insns. */ +#define TARGET_SH2A TARGET_HARD_SH2A +/* Nonzero if we should generate code using type 2A SF insns. */ +#define TARGET_SH2A_SINGLE (TARGET_SH2A && TARGET_SH2E) +/* Nonzero if we should generate code using type 2A DF insns. */ +#define TARGET_SH2A_DOUBLE (TARGET_HARD_SH2A_DOUBLE && TARGET_SH2A) + +/* Nonzero if we should generate code using type 3E insns. */ +#define TARGET_SH3E (TARGET_SH3 && TARGET_SH_E) + +/* Nonzero if the cache line size is 32. */ +#define TARGET_CACHE32 (TARGET_HARD_SH4 || TARGET_SH5) + +/* Nonzero if we schedule for a superscalar implementation. */ +#define TARGET_SUPERSCALAR TARGET_HARD_SH4 + +/* Nonzero if the target has separate instruction and data caches. */ +#define TARGET_HARVARD (TARGET_HARD_SH4 || TARGET_SH5) + +/* Nonzero if a double-precision FPU is available. */ +#define TARGET_FPU_DOUBLE \ + ((target_flags & MASK_SH4) != 0 || TARGET_SH2A_DOUBLE) + +/* Nonzero if an FPU is available. */ +#define TARGET_FPU_ANY (TARGET_SH2E || TARGET_FPU_DOUBLE) + +/* Nonzero if we should generate code using type 4 insns. */ +#undef TARGET_SH4 +#define TARGET_SH4 ((target_flags & MASK_SH4) != 0 && TARGET_SH1) + +/* Nonzero if we're generating code for the common subset of + instructions present on both SH4a and SH4al-dsp. */ +#define TARGET_SH4A_ARCH TARGET_SH4A + +/* Nonzero if we're generating code for SH4a, unless the use of the + FPU is disabled (which makes it compatible with SH4al-dsp). */ +#define TARGET_SH4A_FP (TARGET_SH4A_ARCH && TARGET_FPU_ANY) + +/* Nonzero if we should generate code using the SHcompact instruction + set and 32-bit ABI. */ +#define TARGET_SHCOMPACT (TARGET_SH5 && TARGET_SH1) + +/* Nonzero if we should generate code using the SHmedia instruction + set and ABI. */ +#define TARGET_SHMEDIA (TARGET_SH5 && ! TARGET_SH1) + +/* Nonzero if we should generate code using the SHmedia ISA and 32-bit + ABI. */ +#define TARGET_SHMEDIA32 (TARGET_SH5 && ! TARGET_SH1 && TARGET_SH_E) + +/* Nonzero if we should generate code using the SHmedia ISA and 64-bit + ABI. */ +#define TARGET_SHMEDIA64 (TARGET_SH5 && ! TARGET_SH1 && ! TARGET_SH_E) + +/* Nonzero if we should generate code using SHmedia FPU instructions. */ +#define TARGET_SHMEDIA_FPU (TARGET_SHMEDIA && TARGET_FPU_DOUBLE) + +/* This is not used by the SH2E calling convention */ +#define TARGET_VARARGS_PRETEND_ARGS(FUN_DECL) \ + (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 \ + && ! (TARGET_HITACHI || sh_attr_renesas_p (FUN_DECL))) + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT SELECT_SH1 +#define SUPPORT_SH1 1 +#define SUPPORT_SH2E 1 +#define SUPPORT_SH4 1 +#define SUPPORT_SH4_SINGLE 1 +#define SUPPORT_SH2A 1 +#define SUPPORT_SH2A_SINGLE 1 +#endif + +#define TARGET_DIVIDE_INV \ + (sh_div_strategy == SH_DIV_INV || sh_div_strategy == SH_DIV_INV_MINLAT \ + || sh_div_strategy == SH_DIV_INV20U || sh_div_strategy == SH_DIV_INV20L \ + || sh_div_strategy == SH_DIV_INV_CALL \ + || sh_div_strategy == SH_DIV_INV_CALL2 || sh_div_strategy == SH_DIV_INV_FP) +#define TARGET_DIVIDE_FP (sh_div_strategy == SH_DIV_FP) +#define TARGET_DIVIDE_INV_FP (sh_div_strategy == SH_DIV_INV_FP) +#define TARGET_DIVIDE_CALL2 (sh_div_strategy == SH_DIV_CALL2) +#define TARGET_DIVIDE_INV_MINLAT (sh_div_strategy == SH_DIV_INV_MINLAT) +#define TARGET_DIVIDE_INV20U (sh_div_strategy == SH_DIV_INV20U) +#define TARGET_DIVIDE_INV20L (sh_div_strategy == SH_DIV_INV20L) +#define TARGET_DIVIDE_INV_CALL (sh_div_strategy == SH_DIV_INV_CALL) +#define TARGET_DIVIDE_INV_CALL2 (sh_div_strategy == SH_DIV_INV_CALL2) +#define TARGET_DIVIDE_CALL_DIV1 (sh_div_strategy == SH_DIV_CALL_DIV1) +#define TARGET_DIVIDE_CALL_FP (sh_div_strategy == SH_DIV_CALL_FP) +#define TARGET_DIVIDE_CALL_TABLE (sh_div_strategy == SH_DIV_CALL_TABLE) + +#define SELECT_SH1 (MASK_SH1) +#define SELECT_SH2 (MASK_SH2 | SELECT_SH1) +#define SELECT_SH2E (MASK_SH_E | MASK_SH2 | MASK_SH1 \ + | MASK_FPU_SINGLE) +#define SELECT_SH2A (MASK_SH_E | MASK_HARD_SH2A \ + | MASK_HARD_SH2A_DOUBLE \ + | MASK_SH2 | MASK_SH1) +#define SELECT_SH2A_NOFPU (MASK_HARD_SH2A | MASK_SH2 | MASK_SH1) +#define SELECT_SH2A_SINGLE_ONLY (MASK_SH_E | MASK_HARD_SH2A | MASK_SH2 \ + | MASK_SH1 | MASK_FPU_SINGLE) +#define SELECT_SH2A_SINGLE (MASK_SH_E | MASK_HARD_SH2A \ + | MASK_FPU_SINGLE | MASK_HARD_SH2A_DOUBLE \ + | MASK_SH2 | MASK_SH1) +#define SELECT_SH3 (MASK_SH3 | SELECT_SH2) +#define SELECT_SH3E (MASK_SH_E | MASK_FPU_SINGLE | SELECT_SH3) +#define SELECT_SH4_NOFPU (MASK_HARD_SH4 | SELECT_SH3) +#define SELECT_SH4_SINGLE_ONLY (MASK_HARD_SH4 | SELECT_SH3E) +#define SELECT_SH4 (MASK_SH4 | MASK_SH_E | MASK_HARD_SH4 \ + | SELECT_SH3) +#define SELECT_SH4_SINGLE (MASK_FPU_SINGLE | SELECT_SH4) +#define SELECT_SH4A_NOFPU (MASK_SH4A | SELECT_SH4_NOFPU) +#define SELECT_SH4A_SINGLE_ONLY (MASK_SH4A | SELECT_SH4_SINGLE_ONLY) +#define SELECT_SH4A (MASK_SH4A | SELECT_SH4) +#define SELECT_SH4A_SINGLE (MASK_SH4A | SELECT_SH4_SINGLE) +#define SELECT_SH5_64MEDIA (MASK_SH5 | MASK_SH4) +#define SELECT_SH5_64MEDIA_NOFPU (MASK_SH5) +#define SELECT_SH5_32MEDIA (MASK_SH5 | MASK_SH4 | MASK_SH_E) +#define SELECT_SH5_32MEDIA_NOFPU (MASK_SH5 | MASK_SH_E) +#define SELECT_SH5_COMPACT (MASK_SH5 | MASK_SH4 | SELECT_SH3E) +#define SELECT_SH5_COMPACT_NOFPU (MASK_SH5 | SELECT_SH3) + +#if SUPPORT_SH1 +#define SUPPORT_SH2 1 +#endif +#if SUPPORT_SH2 +#define SUPPORT_SH3 1 +#define SUPPORT_SH2A_NOFPU 1 +#endif +#if SUPPORT_SH3 +#define SUPPORT_SH4_NOFPU 1 +#endif +#if SUPPORT_SH4_NOFPU +#define SUPPORT_SH4A_NOFPU 1 +#define SUPPORT_SH4AL 1 +#endif + +#if SUPPORT_SH2E +#define SUPPORT_SH3E 1 +#define SUPPORT_SH2A_SINGLE_ONLY 1 +#endif +#if SUPPORT_SH3E +#define SUPPORT_SH4_SINGLE_ONLY 1 +#endif +#if SUPPORT_SH4_SINGLE_ONLY +#define SUPPORT_SH4A_SINGLE_ONLY 1 +#endif + +#if SUPPORT_SH4 +#define SUPPORT_SH4A 1 +#endif + +#if SUPPORT_SH4_SINGLE +#define SUPPORT_SH4A_SINGLE 1 +#endif + +#if SUPPORT_SH5_COMPAT +#define SUPPORT_SH5_32MEDIA 1 +#endif + +#if SUPPORT_SH5_COMPACT_NOFPU +#define SUPPORT_SH5_32MEDIA_NOFPU 1 +#endif + +#define SUPPORT_ANY_SH5_32MEDIA \ + (SUPPORT_SH5_32MEDIA || SUPPORT_SH5_32MEDIA_NOFPU) +#define SUPPORT_ANY_SH5_64MEDIA \ + (SUPPORT_SH5_64MEDIA || SUPPORT_SH5_64MEDIA_NOFPU) +#define SUPPORT_ANY_SH5 \ + (SUPPORT_ANY_SH5_32MEDIA || SUPPORT_ANY_SH5_64MEDIA) + +/* Reset all target-selection flags. */ +#define MASK_ARCH (MASK_SH1 | MASK_SH2 | MASK_SH3 | MASK_SH_E | MASK_SH4 \ + | MASK_HARD_SH2A | MASK_HARD_SH2A_DOUBLE | MASK_SH4A \ + | MASK_HARD_SH4 | MASK_FPU_SINGLE | MASK_SH5) + +/* This defaults us to big-endian. */ +#ifndef TARGET_ENDIAN_DEFAULT +#define TARGET_ENDIAN_DEFAULT 0 +#endif + +#ifndef TARGET_OPT_DEFAULT +#define TARGET_OPT_DEFAULT MASK_ADJUST_UNROLL +#endif + +#define TARGET_DEFAULT \ + (TARGET_CPU_DEFAULT | TARGET_ENDIAN_DEFAULT | TARGET_OPT_DEFAULT) + +#ifndef SH_MULTILIB_CPU_DEFAULT +#define SH_MULTILIB_CPU_DEFAULT "m1" +#endif + +#if TARGET_ENDIAN_DEFAULT +#define MULTILIB_DEFAULTS { "ml", SH_MULTILIB_CPU_DEFAULT } +#else +#define MULTILIB_DEFAULTS { "mb", SH_MULTILIB_CPU_DEFAULT } +#endif + +#define CPP_SPEC " %(subtarget_cpp_spec) " + +#ifndef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "" +#endif + +#ifndef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS +#endif + +#define EXTRA_SPECS \ + { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \ + { "link_emul_prefix", LINK_EMUL_PREFIX }, \ + { "link_default_cpu_emul", LINK_DEFAULT_CPU_EMUL }, \ + { "subtarget_link_emul_suffix", SUBTARGET_LINK_EMUL_SUFFIX }, \ + { "subtarget_link_spec", SUBTARGET_LINK_SPEC }, \ + { "subtarget_asm_endian_spec", SUBTARGET_ASM_ENDIAN_SPEC }, \ + { "subtarget_asm_relax_spec", SUBTARGET_ASM_RELAX_SPEC }, \ + { "subtarget_asm_isa_spec", SUBTARGET_ASM_ISA_SPEC }, \ + { "subtarget_asm_spec", SUBTARGET_ASM_SPEC }, \ + SUBTARGET_EXTRA_SPECS + +#if TARGET_CPU_DEFAULT & MASK_HARD_SH4 +#define SUBTARGET_ASM_RELAX_SPEC "%{!m1:%{!m2:%{!m3*:%{!m5*:-isa=sh4-up}}}}" +#else +#define SUBTARGET_ASM_RELAX_SPEC "%{m4*:-isa=sh4-up}" +#endif + +#define SH_ASM_SPEC \ + "%(subtarget_asm_endian_spec) %{mrelax:-relax %(subtarget_asm_relax_spec)}\ +%(subtarget_asm_isa_spec) %(subtarget_asm_spec)\ +%{m2a:--isa=sh2a} \ +%{m2a-single:--isa=sh2a} \ +%{m2a-single-only:--isa=sh2a} \ +%{m2a-nofpu:--isa=sh2a-nofpu} \ +%{m5-compact*:--isa=SHcompact} \ +%{m5-32media*:--isa=SHmedia --abi=32} \ +%{m5-64media*:--isa=SHmedia --abi=64} \ +%{m4al:-dsp} %{mcut2-workaround:-cut2-workaround}" + +#define ASM_SPEC SH_ASM_SPEC + +#ifndef SUBTARGET_ASM_ENDIAN_SPEC +#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN +#define SUBTARGET_ASM_ENDIAN_SPEC "%{mb:-big} %{!mb:-little}" +#else +#define SUBTARGET_ASM_ENDIAN_SPEC "%{ml:-little} %{!ml:-big}" +#endif +#endif + +#if STRICT_NOFPU == 1 +/* Strict nofpu means that the compiler should tell the assembler + to reject FPU instructions. E.g. from ASM inserts. */ +#if TARGET_CPU_DEFAULT & MASK_HARD_SH4 && !(TARGET_CPU_DEFAULT & MASK_SH_E) +#define SUBTARGET_ASM_ISA_SPEC "%{!m1:%{!m2:%{!m3*:%{m4-nofpu|!m4*:%{!m5:-isa=sh4-nofpu}}}}}" +#else +/* If there were an -isa option for sh5-nofpu then it would also go here. */ +#define SUBTARGET_ASM_ISA_SPEC \ + "%{m4-nofpu:-isa=sh4-nofpu} " ASM_ISA_DEFAULT_SPEC +#endif +#else /* ! STRICT_NOFPU */ +#define SUBTARGET_ASM_ISA_SPEC ASM_ISA_DEFAULT_SPEC +#endif + +#ifndef SUBTARGET_ASM_SPEC +#define SUBTARGET_ASM_SPEC "" +#endif + +#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN +#define LINK_EMUL_PREFIX "sh%{!mb:l}" +#else +#define LINK_EMUL_PREFIX "sh%{ml:l}" +#endif + +#if TARGET_CPU_DEFAULT & MASK_SH5 +#if TARGET_CPU_DEFAULT & MASK_SH_E +#define LINK_DEFAULT_CPU_EMUL "32" +#if TARGET_CPU_DEFAULT & MASK_SH1 +#define ASM_ISA_SPEC_DEFAULT "--isa=SHcompact" +#else +#define ASM_ISA_SPEC_DEFAULT "--isa=SHmedia --abi=32" +#endif /* MASK_SH1 */ +#else /* !MASK_SH_E */ +#define LINK_DEFAULT_CPU_EMUL "64" +#define ASM_ISA_SPEC_DEFAULT "--isa=SHmedia --abi=64" +#endif /* MASK_SH_E */ +#define ASM_ISA_DEFAULT_SPEC \ +" %{!m1:%{!m2*:%{!m3*:%{!m4*:%{!m5*:" ASM_ISA_SPEC_DEFAULT "}}}}}" +#else /* !MASK_SH5 */ +#define LINK_DEFAULT_CPU_EMUL "" +#define ASM_ISA_DEFAULT_SPEC "" +#endif /* MASK_SH5 */ + +#define SUBTARGET_LINK_EMUL_SUFFIX "" +#define SUBTARGET_LINK_SPEC "" + +/* Go via SH_LINK_SPEC to avoid code replication. */ +#define LINK_SPEC SH_LINK_SPEC + +#define SH_LINK_SPEC "\ +-m %(link_emul_prefix)\ +%{m5-compact*|m5-32media*:32}\ +%{m5-64media*:64}\ +%{!m1:%{!m2:%{!m3*:%{!m4*:%{!m5*:%(link_default_cpu_emul)}}}}}\ +%(subtarget_link_emul_suffix) \ +%{mrelax:-relax} %(subtarget_link_spec)" + +#ifndef SH_DIV_STR_FOR_SIZE +#define SH_DIV_STR_FOR_SIZE "call" +#endif + +#define DRIVER_SELF_SPECS "%{m2a:%{ml:%eSH2a does not support little-endian}}" + +#define ASSEMBLER_DIALECT assembler_dialect + +extern int assembler_dialect; + +enum sh_divide_strategy_e { + /* SH5 strategies. */ + SH_DIV_CALL, + SH_DIV_CALL2, + SH_DIV_FP, /* We could do this also for SH4. */ + SH_DIV_INV, + SH_DIV_INV_MINLAT, + SH_DIV_INV20U, + SH_DIV_INV20L, + SH_DIV_INV_CALL, + SH_DIV_INV_CALL2, + SH_DIV_INV_FP, + /* SH1 .. SH4 strategies. Because of the small number of registers + available, the compiler uses knowledge of the actual set of registers + being clobbered by the different functions called. */ + SH_DIV_CALL_DIV1, /* No FPU, medium size, highest latency. */ + SH_DIV_CALL_FP, /* FPU needed, small size, high latency. */ + SH_DIV_CALL_TABLE, /* No FPU, large size, medium latency. */ + SH_DIV_INTRINSIC +}; + +extern enum sh_divide_strategy_e sh_div_strategy; + +#ifndef SH_DIV_STRATEGY_DEFAULT +#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL +#endif + +#define SUBTARGET_OVERRIDE_OPTIONS (void) 0 + + +/* Target machine storage layout. */ + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ + +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. */ +#define BYTES_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0) + +/* Define this if most significant word of a multiword number is the lowest + numbered. */ +#define WORDS_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0) + +#define MAX_BITS_PER_WORD 64 + +/* Width in bits of an `int'. We want just 32-bits, even if words are + longer. */ +#define INT_TYPE_SIZE 32 + +/* Width in bits of a `long'. */ +#define LONG_TYPE_SIZE (TARGET_SHMEDIA64 ? 64 : 32) + +/* Width in bits of a `long long'. */ +#define LONG_LONG_TYPE_SIZE 64 + +/* Width in bits of a `long double'. */ +#define LONG_DOUBLE_TYPE_SIZE 64 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD (TARGET_SHMEDIA ? 8 : 4) +#define MIN_UNITS_PER_WORD 4 + +/* Scaling factor for Dwarf data offsets for CFI information. + The dwarf2out.c default would use -UNITS_PER_WORD, which is -8 for + SHmedia; however, since we do partial register saves for the registers + visible to SHcompact, and for target registers for SHMEDIA32, we have + to allow saves that are only 4-byte aligned. */ +#define DWARF_CIE_DATA_ALIGNMENT -4 + +/* Width in bits of a pointer. + See also the macro `Pmode' defined below. */ +#define POINTER_SIZE (TARGET_SHMEDIA64 ? 64 : 32) + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY (TARGET_SH5 ? 64 : 32) + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +#define STACK_BOUNDARY BIGGEST_ALIGNMENT + +/* The log (base 2) of the cache line size, in bytes. Processors prior to + SH2 have no actual cache, but they fetch code in chunks of 4 bytes. + The SH2/3 have 16 byte cache lines, and the SH4 has a 32 byte cache line */ +#define CACHE_LOG (TARGET_CACHE32 ? 5 : TARGET_SH2 ? 4 : 2) + +/* ABI given & required minimum allocation boundary (in *bits*) for the + code of a function. */ +#define FUNCTION_BOUNDARY (16 << TARGET_SHMEDIA) + +/* On SH5, the lowest bit is used to indicate SHmedia functions, so + the vbit must go into the delta field of + pointers-to-member-functions. */ +#define TARGET_PTRMEMFUNC_VBIT_LOCATION \ + (TARGET_SH5 ? ptrmemfunc_vbit_in_delta : ptrmemfunc_vbit_in_pfn) + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 32 + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32) + +/* The best alignment to use in cases where we have a choice. */ +#define FASTEST_ALIGNMENT (TARGET_SH5 ? 64 : 32) + +/* Make strings word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < FASTEST_ALIGNMENT) \ + ? FASTEST_ALIGNMENT : (ALIGN)) + +/* get_mode_alignment assumes complex values are always held in multiple + registers, but that is not the case on the SH; CQImode and CHImode are + held in a single integer register. SH5 also holds CSImode and SCmode + values in integer registers. This is relevant for argument passing on + SHcompact as we use a stack temp in order to pass CSImode by reference. */ +#define LOCAL_ALIGNMENT(TYPE, ALIGN) \ + ((GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_INT \ + || GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_FLOAT) \ + ? (unsigned) MIN (BIGGEST_ALIGNMENT, GET_MODE_BITSIZE (TYPE_MODE (TYPE))) \ + : (unsigned) DATA_ALIGNMENT(TYPE, ALIGN)) + +/* Make arrays of chars word-aligned for the same reasons. */ +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN)) + +/* Number of bits which any structure or union's size must be a + multiple of. Each structure or union's size is rounded up to a + multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY (TARGET_PADSTRUCT ? 32 : 8) + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 1 + +/* If LABEL_AFTER_BARRIER demands an alignment, return its base 2 logarithm. */ +#define LABEL_ALIGN_AFTER_BARRIER(LABEL_AFTER_BARRIER) \ + barrier_align (LABEL_AFTER_BARRIER) + +#define LOOP_ALIGN(A_LABEL) \ + ((! optimize || TARGET_HARD_SH4 || optimize_size) \ + ? 0 : sh_loop_align (A_LABEL)) + +#define LABEL_ALIGN(A_LABEL) \ +( \ + (PREV_INSN (A_LABEL) \ + && NONJUMP_INSN_P (PREV_INSN (A_LABEL)) \ + && GET_CODE (PATTERN (PREV_INSN (A_LABEL))) == UNSPEC_VOLATILE \ + && XINT (PATTERN (PREV_INSN (A_LABEL)), 1) == UNSPECV_ALIGN) \ + /* explicit alignment insn in constant tables. */ \ + ? INTVAL (XVECEXP (PATTERN (PREV_INSN (A_LABEL)), 0, 0)) \ + : 0) + +/* Jump tables must be 32 bit aligned, no matter the size of the element. */ +#define ADDR_VEC_ALIGN(ADDR_VEC) 2 + +/* The base two logarithm of the known minimum alignment of an insn length. */ +#define INSN_LENGTH_ALIGNMENT(A_INSN) \ + (NONJUMP_INSN_P (A_INSN) \ + ? 1 << TARGET_SHMEDIA \ + : JUMP_P (A_INSN) || CALL_P (A_INSN) \ + ? 1 << TARGET_SHMEDIA \ + : CACHE_LOG) + +/* Standard register usage. */ + +/* Register allocation for the Renesas calling convention: + + r0 arg return + r1..r3 scratch + r4..r7 args in + r8..r13 call saved + r14 frame pointer/call saved + r15 stack pointer + ap arg pointer (doesn't really exist, always eliminated) + pr subroutine return address + t t bit + mach multiply/accumulate result, high part + macl multiply/accumulate result, low part. + fpul fp/int communication register + rap return address pointer register + fr0 fp arg return + fr1..fr3 scratch floating point registers + fr4..fr11 fp args in + fr12..fr15 call saved floating point registers */ + +#define MAX_REGISTER_NAME_LENGTH 5 +extern char sh_register_names[][MAX_REGISTER_NAME_LENGTH + 1]; + +#define SH_REGISTER_NAMES_INITIALIZER \ +{ \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", \ + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", \ + "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", \ + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", \ + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", \ + "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", \ + "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", \ + "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15", \ + "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23", \ + "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31", \ + "fr32", "fr33", "fr34", "fr35", "fr36", "fr37", "fr38", "fr39", \ + "fr40", "fr41", "fr42", "fr43", "fr44", "fr45", "fr46", "fr47", \ + "fr48", "fr49", "fr50", "fr51", "fr52", "fr53", "fr54", "fr55", \ + "fr56", "fr57", "fr58", "fr59", "fr60", "fr61", "fr62", "fr63", \ + "tr0", "tr1", "tr2", "tr3", "tr4", "tr5", "tr6", "tr7", \ + "xd0", "xd2", "xd4", "xd6", "xd8", "xd10", "xd12", "xd14", \ + "gbr", "ap", "pr", "t", "mach", "macl", "fpul", "fpscr", \ + "rap", "sfp" \ +} + +#define REGNAMES_ARR_INDEX_1(index) \ + (sh_register_names[index]) +#define REGNAMES_ARR_INDEX_2(index) \ + REGNAMES_ARR_INDEX_1 ((index)), REGNAMES_ARR_INDEX_1 ((index)+1) +#define REGNAMES_ARR_INDEX_4(index) \ + REGNAMES_ARR_INDEX_2 ((index)), REGNAMES_ARR_INDEX_2 ((index)+2) +#define REGNAMES_ARR_INDEX_8(index) \ + REGNAMES_ARR_INDEX_4 ((index)), REGNAMES_ARR_INDEX_4 ((index)+4) +#define REGNAMES_ARR_INDEX_16(index) \ + REGNAMES_ARR_INDEX_8 ((index)), REGNAMES_ARR_INDEX_8 ((index)+8) +#define REGNAMES_ARR_INDEX_32(index) \ + REGNAMES_ARR_INDEX_16 ((index)), REGNAMES_ARR_INDEX_16 ((index)+16) +#define REGNAMES_ARR_INDEX_64(index) \ + REGNAMES_ARR_INDEX_32 ((index)), REGNAMES_ARR_INDEX_32 ((index)+32) + +#define REGISTER_NAMES \ +{ \ + REGNAMES_ARR_INDEX_64 (0), \ + REGNAMES_ARR_INDEX_64 (64), \ + REGNAMES_ARR_INDEX_8 (128), \ + REGNAMES_ARR_INDEX_8 (136), \ + REGNAMES_ARR_INDEX_8 (144), \ + REGNAMES_ARR_INDEX_2 (152) \ +} + +#define ADDREGNAMES_SIZE 32 +#define MAX_ADDITIONAL_REGISTER_NAME_LENGTH 4 +extern char sh_additional_register_names[ADDREGNAMES_SIZE] \ + [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]; + +#define SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER \ +{ \ + "dr0", "dr2", "dr4", "dr6", "dr8", "dr10", "dr12", "dr14", \ + "dr16", "dr18", "dr20", "dr22", "dr24", "dr26", "dr28", "dr30", \ + "dr32", "dr34", "dr36", "dr38", "dr40", "dr42", "dr44", "dr46", \ + "dr48", "dr50", "dr52", "dr54", "dr56", "dr58", "dr60", "dr62" \ +} + +#define ADDREGNAMES_REGNO(index) \ + ((index < 32) ? (FIRST_FP_REG + (index) * 2) \ + : (-1)) + +#define ADDREGNAMES_ARR_INDEX_1(index) \ + { (sh_additional_register_names[index]), ADDREGNAMES_REGNO (index) } +#define ADDREGNAMES_ARR_INDEX_2(index) \ + ADDREGNAMES_ARR_INDEX_1 ((index)), ADDREGNAMES_ARR_INDEX_1 ((index)+1) +#define ADDREGNAMES_ARR_INDEX_4(index) \ + ADDREGNAMES_ARR_INDEX_2 ((index)), ADDREGNAMES_ARR_INDEX_2 ((index)+2) +#define ADDREGNAMES_ARR_INDEX_8(index) \ + ADDREGNAMES_ARR_INDEX_4 ((index)), ADDREGNAMES_ARR_INDEX_4 ((index)+4) +#define ADDREGNAMES_ARR_INDEX_16(index) \ + ADDREGNAMES_ARR_INDEX_8 ((index)), ADDREGNAMES_ARR_INDEX_8 ((index)+8) +#define ADDREGNAMES_ARR_INDEX_32(index) \ + ADDREGNAMES_ARR_INDEX_16 ((index)), ADDREGNAMES_ARR_INDEX_16 ((index)+16) + +#define ADDITIONAL_REGISTER_NAMES \ +{ \ + ADDREGNAMES_ARR_INDEX_32 (0) \ +} + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. */ + +/* There are many other relevant definitions in sh.md's md_constants. */ + +#define FIRST_GENERAL_REG R0_REG +#define LAST_GENERAL_REG (FIRST_GENERAL_REG + (TARGET_SHMEDIA ? 63 : 15)) +#define FIRST_FP_REG DR0_REG +#define LAST_FP_REG (FIRST_FP_REG + \ + (TARGET_SHMEDIA_FPU ? 63 : TARGET_SH2E ? 15 : -1)) +#define FIRST_XD_REG XD0_REG +#define LAST_XD_REG (FIRST_XD_REG + ((TARGET_SH4 && TARGET_FMOVD) ? 7 : -1)) +#define FIRST_TARGET_REG TR0_REG +#define LAST_TARGET_REG (FIRST_TARGET_REG + (TARGET_SHMEDIA ? 7 : -1)) + +/* Registers that can be accessed through bank0 or bank1 depending on sr.md. */ + +#define FIRST_BANKED_REG R0_REG +#define LAST_BANKED_REG R7_REG + +#define BANKED_REGISTER_P(REGNO) \ + IN_RANGE ((REGNO), \ + (unsigned HOST_WIDE_INT) FIRST_BANKED_REG, \ + (unsigned HOST_WIDE_INT) LAST_BANKED_REG) + +#define GENERAL_REGISTER_P(REGNO) \ + IN_RANGE ((REGNO), \ + (unsigned HOST_WIDE_INT) FIRST_GENERAL_REG, \ + (unsigned HOST_WIDE_INT) LAST_GENERAL_REG) + +#define GENERAL_OR_AP_REGISTER_P(REGNO) \ + (GENERAL_REGISTER_P (REGNO) || ((REGNO) == AP_REG) \ + || ((REGNO) == FRAME_POINTER_REGNUM)) + +#define FP_REGISTER_P(REGNO) \ + ((int) (REGNO) >= FIRST_FP_REG && (int) (REGNO) <= LAST_FP_REG) + +#define XD_REGISTER_P(REGNO) \ + ((int) (REGNO) >= FIRST_XD_REG && (int) (REGNO) <= LAST_XD_REG) + +#define FP_OR_XD_REGISTER_P(REGNO) \ + (FP_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO)) + +#define FP_ANY_REGISTER_P(REGNO) \ + (FP_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO) || (REGNO) == FPUL_REG) + +#define SPECIAL_REGISTER_P(REGNO) \ + ((REGNO) == GBR_REG || (REGNO) == T_REG \ + || (REGNO) == MACH_REG || (REGNO) == MACL_REG) + +#define TARGET_REGISTER_P(REGNO) \ + ((int) (REGNO) >= FIRST_TARGET_REG && (int) (REGNO) <= LAST_TARGET_REG) + +#define SHMEDIA_REGISTER_P(REGNO) \ + (GENERAL_REGISTER_P (REGNO) || FP_REGISTER_P (REGNO) \ + || TARGET_REGISTER_P (REGNO)) + +/* This is to be used in TARGET_CONDITIONAL_REGISTER_USAGE, to mark + registers that should be fixed. */ +#define VALID_REGISTER_P(REGNO) \ + (SHMEDIA_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO) \ + || (REGNO) == AP_REG || (REGNO) == RAP_REG \ + || (REGNO) == FRAME_POINTER_REGNUM \ + || (TARGET_SH1 && (SPECIAL_REGISTER_P (REGNO) || (REGNO) == PR_REG)) \ + || (TARGET_SH2E && (REGNO) == FPUL_REG)) + +/* The mode that should be generally used to store a register by + itself in the stack, or to load it back. */ +#define REGISTER_NATURAL_MODE(REGNO) \ + (FP_REGISTER_P (REGNO) ? SFmode \ + : XD_REGISTER_P (REGNO) ? DFmode \ + : TARGET_SHMEDIA && ! HARD_REGNO_CALL_PART_CLOBBERED ((REGNO), DImode) \ + ? DImode \ + : SImode) + +#define FIRST_PSEUDO_REGISTER 154 + +/* Don't count soft frame pointer. */ +#define DWARF_FRAME_REGISTERS (FIRST_PSEUDO_REGISTER - 1) + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. + + Mach register is fixed 'cause it's only 10 bits wide for SH1. + It is 32 bits wide for SH2. */ + +#define FIXED_REGISTERS \ +{ \ +/* Regular registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 1, \ + /* r16 is reserved, r18 is the former pr. */ \ + 1, 0, 0, 0, 0, 0, 0, 0, \ + /* r24 is reserved for the OS; r25, for the assembler or linker. */ \ + /* r26 is a global variable data pointer; r27 is for constants. */ \ + 1, 1, 1, 1, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 1, \ +/* FP registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/* Branch target registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/* XD registers. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/*"gbr", "ap", "pr", "t", "mach", "macl", "fpul", "fpscr", */ \ + 1, 1, 1, 1, 1, 1, 0, 1, \ +/*"rap", "sfp" */ \ + 1, 1, \ +} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ + +#define CALL_USED_REGISTERS \ +{ \ +/* Regular registers. */ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. \ + Only the lower 32bits of R10-R14 are guaranteed to be preserved \ + across SH5 function calls. */ \ + 0, 0, 0, 0, 0, 0, 0, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 0, 0, 0, 0, \ + 0, 0, 0, 0, 1, 1, 1, 1, \ + 1, 1, 1, 1, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 1, 1, 1, 1, \ +/* FP registers. */ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/* Branch target registers. */ \ + 1, 1, 1, 1, 1, 0, 0, 0, \ +/* XD registers. */ \ + 1, 1, 1, 1, 1, 1, 0, 0, \ +/*"gbr", "ap", "pr", "t", "mach", "macl", "fpul", "fpscr", */ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ +/*"rap", "sfp" */ \ + 1, 1, \ +} + +/* TARGET_CONDITIONAL_REGISTER_USAGE might want to make a register + call-used, yet fixed, like PIC_OFFSET_TABLE_REGNUM. */ +#define CALL_REALLY_USED_REGISTERS CALL_USED_REGISTERS + +/* Only the lower 32-bits of R10-R14 are guaranteed to be preserved + across SHcompact function calls. We can't tell whether a called + function is SHmedia or SHcompact, so we assume it may be when + compiling SHmedia code with the 32-bit ABI, since that's the only + ABI that can be linked with SHcompact code. */ +#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO,MODE) \ + (TARGET_SHMEDIA32 \ + && GET_MODE_SIZE (MODE) > 4 \ + && (((REGNO) >= FIRST_GENERAL_REG + 10 \ + && (REGNO) <= FIRST_GENERAL_REG + 15) \ + || TARGET_REGISTER_P (REGNO) \ + || (REGNO) == PR_MEDIA_REG)) + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. + + On the SH all but the XD regs are UNITS_PER_WORD bits wide. */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + (XD_REGISTER_P (REGNO) \ + ? ((GET_MODE_SIZE (MODE) + (2*UNITS_PER_WORD - 1)) / (2*UNITS_PER_WORD)) \ + : (TARGET_SHMEDIA && FP_REGISTER_P (REGNO)) \ + ? ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD/2 - 1) / (UNITS_PER_WORD/2)) \ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ + +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + sh_hard_regno_mode_ok ((REGNO), (MODE)) + +/* Value is 1 if it is a good idea to tie two pseudo registers + when one has mode MODE1 and one has mode MODE2. + If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, + for any hard reg, then this must be 0 for correct output. + That's the case for xd registers: we don't hold SFmode values in + them, so we can't tie an SFmode pseudos with one in another + floating-point mode. */ + +#define MODES_TIEABLE_P(MODE1, MODE2) \ + ((MODE1) == (MODE2) \ + || (TARGET_SHMEDIA \ + && GET_MODE_SIZE (MODE1) == GET_MODE_SIZE (MODE2) \ + && INTEGRAL_MODE_P (MODE1) && INTEGRAL_MODE_P (MODE2)) \ + || (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2) \ + && (TARGET_SHMEDIA ? ((GET_MODE_SIZE (MODE1) <= 4) \ + && (GET_MODE_SIZE (MODE2) <= 4)) \ + : ((MODE1) != SFmode && (MODE2) != SFmode)))) + +/* A C expression that is nonzero if hard register NEW_REG can be + considered for use as a rename register for OLD_REG register */ + +#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \ + sh_hard_regno_rename_ok (OLD_REG, NEW_REG) + +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* Define this if the program counter is overloaded on a register. */ +/* #define PC_REGNUM 15*/ + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM SP_REG + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM FP_REG + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM 153 + +/* Fake register that holds the address on the stack of the + current function's return address. */ +#define RETURN_ADDRESS_POINTER_REGNUM RAP_REG + +/* Register to hold the addressing base for position independent + code access to data items. */ +#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? PIC_REG : INVALID_REGNUM) + +#define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_" + +/* Definitions for register eliminations. + + We have three registers that can be eliminated on the SH. First, the + frame pointer register can often be eliminated in favor of the stack + pointer register. Secondly, the argument pointer register can always be + eliminated; it is replaced with either the stack or frame pointer. + Third, there is the return address pointer, which can also be replaced + with either the stack or the frame pointer. */ + +/* This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. */ + +/* If you add any registers here that are not actually hard registers, + and that have any alternative of elimination that doesn't always + apply, you need to amend calc_live_regs to exclude it, because + reload spills all eliminable registers where it sees an + can_eliminate == 0 entry, thus making them 'live' . + If you add any hard registers that can be eliminated in different + ways, you have to patch reload to spill them only when all alternatives + of elimination fail. */ + +#define ELIMINABLE_REGS \ +{{ HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { RETURN_ADDRESS_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},} + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + OFFSET = initial_elimination_offset ((FROM), (TO)) + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM AP_REG + +/* Register in which the static-chain is passed to a function. */ +#define STATIC_CHAIN_REGNUM (TARGET_SH5 ? 1 : 3) + +/* Don't default to pcc-struct-return, because we have already specified + exactly how to return structures in the TARGET_RETURN_IN_MEMORY + target hook. */ + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +#define SHMEDIA_REGS_STACK_ADJUST() \ + (TARGET_SHCOMPACT && crtl->saves_all_registers \ + ? (8 * (/* r28-r35 */ 8 + /* r44-r59 */ 16 + /* tr5-tr7 */ 3) \ + + (TARGET_FPU_ANY ? 4 * (/* fr36 - fr63 */ 28) : 0)) \ + : 0) + + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + +/* The SH has two sorts of general registers, R0 and the rest. R0 can + be used as the destination of some of the arithmetic ops. There are + also some special purpose registers; the T bit register, the + Procedure Return Register and the Multiply Accumulate Registers. */ +/* Place GENERAL_REGS after FPUL_REGS so that it will be preferred by + reg_class_subunion. We don't want to have an actual union class + of these, because it would only be used when both classes are calculated + to give the same cost, but there is only one FPUL register. + Besides, regclass fails to notice the different REGISTER_MOVE_COSTS + applying to the actual instruction alternative considered. E.g., the + y/r alternative of movsi_ie is considered to have no more cost that + the r/r alternative, which is patently untrue. */ + +enum reg_class +{ + NO_REGS, + R0_REGS, + PR_REGS, + T_REGS, + MAC_REGS, + FPUL_REGS, + SIBCALL_REGS, + GENERAL_REGS, + FP0_REGS, + FP_REGS, + DF_HI_REGS, + DF_REGS, + FPSCR_REGS, + GENERAL_FP_REGS, + GENERAL_DF_REGS, + TARGET_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "R0_REGS", \ + "PR_REGS", \ + "T_REGS", \ + "MAC_REGS", \ + "FPUL_REGS", \ + "SIBCALL_REGS", \ + "GENERAL_REGS", \ + "FP0_REGS", \ + "FP_REGS", \ + "DF_HI_REGS", \ + "DF_REGS", \ + "FPSCR_REGS", \ + "GENERAL_FP_REGS", \ + "GENERAL_DF_REGS", \ + "TARGET_REGS", \ + "ALL_REGS", \ +} + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ +{ \ +/* NO_REGS: */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, \ +/* R0_REGS: */ \ + { 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, \ +/* PR_REGS: */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00040000 }, \ +/* T_REGS: */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00080000 }, \ +/* MAC_REGS: */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00300000 }, \ +/* FPUL_REGS: */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00400000 }, \ +/* SIBCALL_REGS: Initialized in TARGET_CONDITIONAL_REGISTER_USAGE. */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, \ +/* GENERAL_REGS: */ \ + { 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x03020000 }, \ +/* FP0_REGS: */ \ + { 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000 }, \ +/* FP_REGS: */ \ + { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000 }, \ +/* DF_HI_REGS: Initialized in TARGET_CONDITIONAL_REGISTER_USAGE. */ \ + { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x0000ff00 }, \ +/* DF_REGS: */ \ + { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x0000ff00 }, \ +/* FPSCR_REGS: */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00800000 }, \ +/* GENERAL_FP_REGS: */ \ + { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x03020000 }, \ +/* GENERAL_DF_REGS: */ \ + { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0302ff00 }, \ +/* TARGET_REGS: */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff }, \ +/* ALL_REGS: */ \ + { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x03ffffff }, \ +} + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +extern enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER]; +#define REGNO_REG_CLASS(REGNO) regno_reg_class[(REGNO)] + +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FP_REGS, PR_REGS, T_REGS, MAC_REGS, TARGET_REGS, \ + FPUL_REGS, LIM_REG_CLASSES \ +} + +/* When this hook returns true for MODE, the compiler allows + registers explicitly used in the rtl to be used as spill registers + but prevents the compiler from extending the lifetime of these + registers. */ +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \ + sh_small_register_classes_for_mode_p + +/* The order in which register should be allocated. */ +/* Sometimes FP0_REGS becomes the preferred class of a floating point pseudo, + and GENERAL_FP_REGS the alternate class. Since FP0 is likely to be + spilled or used otherwise, we better have the FP_REGS allocated first. */ +#define REG_ALLOC_ORDER \ + {/* Caller-saved FPRs */ \ + 65, 66, 67, 68, 69, 70, 71, 64, \ + 72, 73, 74, 75, 80, 81, 82, 83, \ + 84, 85, 86, 87, 88, 89, 90, 91, \ + 92, 93, 94, 95, 96, 97, 98, 99, \ + /* Callee-saved FPRs */ \ + 76, 77, 78, 79,100,101,102,103, \ + 104,105,106,107,108,109,110,111, \ + 112,113,114,115,116,117,118,119, \ + 120,121,122,123,124,125,126,127, \ + 136,137,138,139,140,141,142,143, \ + /* FPSCR */ 151, \ + /* Caller-saved GPRs (except 8/9 on SH1-4) */ \ + 1, 2, 3, 7, 6, 5, 4, 0, \ + 8, 9, 17, 19, 20, 21, 22, 23, \ + 36, 37, 38, 39, 40, 41, 42, 43, \ + 60, 61, 62, \ + /* SH1-4 callee-saved saved GPRs / SH5 partially-saved GPRs */ \ + 10, 11, 12, 13, 14, 18, \ + /* SH5 callee-saved GPRs */ \ + 28, 29, 30, 31, 32, 33, 34, 35, \ + 44, 45, 46, 47, 48, 49, 50, 51, \ + 52, 53, 54, 55, 56, 57, 58, 59, \ + /* FPUL */ 150, \ + /* SH5 branch target registers */ \ + 128,129,130,131,132,133,134,135, \ + /* Fixed registers */ \ + 15, 16, 24, 25, 26, 27, 63,144, \ + 145,146,147,148,149,152,153 } + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS \ + (!ALLOW_INDEXED_ADDRESS ? NO_REGS : TARGET_SHMEDIA ? GENERAL_REGS : R0_REGS) +#define BASE_REG_CLASS GENERAL_REGS + +/* Defines for sh.md and constraints.md. */ + +#define CONST_OK_FOR_I06(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -32 \ + && ((HOST_WIDE_INT)(VALUE)) <= 31) +#define CONST_OK_FOR_I08(VALUE) (((HOST_WIDE_INT)(VALUE))>= -128 \ + && ((HOST_WIDE_INT)(VALUE)) <= 127) +#define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \ + && ((HOST_WIDE_INT)(VALUE)) <= 511) +#define CONST_OK_FOR_I16(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -32768 \ + && ((HOST_WIDE_INT)(VALUE)) <= 32767) + +#define CONST_OK_FOR_J16(VALUE) \ + ((HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) 0xffffffff) \ + || (HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) -1 << 32)) + +#define CONST_OK_FOR_K08(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \ + && ((HOST_WIDE_INT)(VALUE)) <= 255) + +#if 0 +#define SECONDARY_INOUT_RELOAD_CLASS(CLASS,MODE,X,ELSE) \ + ((((REGCLASS_HAS_FP_REG (CLASS) \ + && (REG_P (X) \ + && (GENERAL_OR_AP_REGISTER_P (REGNO (X)) \ + || (FP_REGISTER_P (REGNO (X)) && (MODE) == SImode \ + && TARGET_FMOVD)))) \ + || (REGCLASS_HAS_GENERAL_REG (CLASS) \ + && REG_P (X) \ + && FP_REGISTER_P (REGNO (X)))) \ + && ! TARGET_SHMEDIA \ + && ((MODE) == SFmode || (MODE) == SImode)) \ + ? FPUL_REGS \ + : (((CLASS) == FPUL_REGS \ + || (REGCLASS_HAS_FP_REG (CLASS) \ + && ! TARGET_SHMEDIA && MODE == SImode)) \ + && (MEM_P (X) \ + || (REG_P (X) \ + && (REGNO (X) >= FIRST_PSEUDO_REGISTER \ + || REGNO (X) == T_REG \ + || system_reg_operand (X, VOIDmode))))) \ + ? GENERAL_REGS \ + : (((CLASS) == TARGET_REGS \ + || (TARGET_SHMEDIA && (CLASS) == SIBCALL_REGS)) \ + && !satisfies_constraint_Csy (X) \ + && (!REG_P (X) || ! GENERAL_REGISTER_P (REGNO (X)))) \ + ? GENERAL_REGS \ + : (((CLASS) == MAC_REGS || (CLASS) == PR_REGS) \ + && REG_P (X) && ! GENERAL_REGISTER_P (REGNO (X)) \ + && (CLASS) != REGNO_REG_CLASS (REGNO (X))) \ + ? GENERAL_REGS \ + : ((CLASS) != GENERAL_REGS && REG_P (X) \ + && TARGET_REGISTER_P (REGNO (X))) \ + ? GENERAL_REGS : (ELSE)) + +#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS,MODE,X) \ + SECONDARY_INOUT_RELOAD_CLASS(CLASS,MODE,X,NO_REGS) + +#define SECONDARY_INPUT_RELOAD_CLASS(CLASS,MODE,X) \ + ((REGCLASS_HAS_FP_REG (CLASS) \ + && ! TARGET_SHMEDIA \ + && immediate_operand ((X), (MODE)) \ + && ! ((fp_zero_operand (X) || fp_one_operand (X)) \ + && (MODE) == SFmode && fldi_ok ())) \ + ? R0_REGS \ + : ((CLASS) == FPUL_REGS \ + && ((REG_P (X) \ + && (REGNO (X) == MACL_REG || REGNO (X) == MACH_REG \ + || REGNO (X) == T_REG)) \ + || GET_CODE (X) == PLUS)) \ + ? GENERAL_REGS \ + : (CLASS) == FPUL_REGS && immediate_operand ((X), (MODE)) \ + ? (satisfies_constraint_I08 (X) \ + ? GENERAL_REGS \ + : R0_REGS) \ + : ((CLASS) == FPSCR_REGS \ + && ((REG_P (X) && REGNO (X) >= FIRST_PSEUDO_REGISTER) \ + || (MEM_P (X) && GET_CODE (XEXP ((X), 0)) == PLUS))) \ + ? GENERAL_REGS \ + : (REGCLASS_HAS_FP_REG (CLASS) \ + && TARGET_SHMEDIA \ + && immediate_operand ((X), (MODE)) \ + && (X) != CONST0_RTX (GET_MODE (X)) \ + && GET_MODE (X) != V4SFmode) \ + ? GENERAL_REGS \ + : (((MODE) == QImode || (MODE) == HImode) \ + && TARGET_SHMEDIA && inqhi_operand ((X), (MODE))) \ + ? GENERAL_REGS \ + : (TARGET_SHMEDIA && (CLASS) == GENERAL_REGS \ + && (GET_CODE (X) == LABEL_REF || PIC_ADDR_P (X))) \ + ? TARGET_REGS \ + : SECONDARY_INOUT_RELOAD_CLASS((CLASS),(MODE),(X), NO_REGS)) +#endif + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. + + If TARGET_SHMEDIA, we need two FP registers per word. + Otherwise we will need at most one register per word. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + (TARGET_SHMEDIA \ + && TEST_HARD_REG_BIT (reg_class_contents[CLASS], FIRST_FP_REG) \ + ? (GET_MODE_SIZE (MODE) + UNITS_PER_WORD/2 - 1) / (UNITS_PER_WORD/2) \ + : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* If defined, gives a class of registers that cannot be used as the + operand of a SUBREG that changes the mode of the object illegally. */ +/* ??? We need to renumber the internal numbers for the frnn registers + when in little endian in order to allow mode size changes. */ + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + sh_cannot_change_mode_class (FROM, TO, CLASS) + +/* Stack layout; function entry, exit and calling. */ + +/* Define the number of registers that can hold parameters. + These macros are used only in other macro definitions below. */ + +#define NPARM_REGS(MODE) \ + (TARGET_FPU_ANY && (MODE) == SFmode \ + ? (TARGET_SH5 ? 12 : 8) \ + : (TARGET_SH4 || TARGET_SH2A_DOUBLE) && (GET_MODE_CLASS (MODE) == MODE_FLOAT \ + || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \ + ? (TARGET_SH5 ? 12 : 8) \ + : (TARGET_SH5 ? 8 : 4)) + +#define FIRST_PARM_REG (FIRST_GENERAL_REG + (TARGET_SH5 ? 2 : 4)) +#define FIRST_RET_REG (FIRST_GENERAL_REG + (TARGET_SH5 ? 2 : 0)) + +#define FIRST_FP_PARM_REG (FIRST_FP_REG + (TARGET_SH5 ? 0 : 4)) +#define FIRST_FP_RET_REG FIRST_FP_REG + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD + +/* Define this macro to nonzero if the addresses of local variable slots + are at negative offsets from the frame pointer. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* Offset from the frame pointer to the first local variable slot to + be allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* If we generate an insn to push BYTES bytes, + this says how many the stack pointer really advances by. */ +/* Don't define PUSH_ROUNDING, since the hardware doesn't do this. + When PUSH_ROUNDING is not defined, PARM_BOUNDARY will cause gcc to + do correct alignment. */ +#if 0 +#define PUSH_ROUNDING(NPUSHED) (((NPUSHED) + 3) & ~3) +#endif + +/* Offset of first parameter from the argument pointer register value. */ +#define FIRST_PARM_OFFSET(FNDECL) 0 + +/* Value is the number of bytes of arguments automatically popped when + calling a subroutine. + CUM is the accumulated argument list. + + On SHcompact, the call trampoline pops arguments off the stack. */ +#define CALL_POPS_ARGS(CUM) (TARGET_SHCOMPACT ? (CUM).stack_regs * 8 : 0) + +/* Some subroutine macros specific to this machine. */ + +#define BASE_RETURN_VALUE_REG(MODE) \ + ((TARGET_FPU_ANY && ((MODE) == SFmode)) \ + ? FIRST_FP_RET_REG \ + : TARGET_FPU_ANY && (MODE) == SCmode \ + ? FIRST_FP_RET_REG \ + : (TARGET_FPU_DOUBLE \ + && ((MODE) == DFmode || (MODE) == SFmode \ + || (MODE) == DCmode || (MODE) == SCmode )) \ + ? FIRST_FP_RET_REG \ + : FIRST_RET_REG) + +#define BASE_ARG_REG(MODE) \ + ((TARGET_SH2E && ((MODE) == SFmode)) \ + ? FIRST_FP_PARM_REG \ + : (TARGET_SH4 || TARGET_SH2A_DOUBLE) && (GET_MODE_CLASS (MODE) == MODE_FLOAT \ + || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)\ + ? FIRST_FP_PARM_REG \ + : FIRST_PARM_REG) + +/* 1 if N is a possible register number for function argument passing. */ +/* ??? There are some callers that pass REGNO as int, and others that pass + it as unsigned. We get warnings unless we do casts everywhere. */ +#define FUNCTION_ARG_REGNO_P(REGNO) \ + (((unsigned) (REGNO) >= (unsigned) FIRST_PARM_REG \ + && (unsigned) (REGNO) < (unsigned) (FIRST_PARM_REG + NPARM_REGS (SImode)))\ + || (TARGET_FPU_ANY \ + && (unsigned) (REGNO) >= (unsigned) FIRST_FP_PARM_REG \ + && (unsigned) (REGNO) < (unsigned) (FIRST_FP_PARM_REG \ + + NPARM_REGS (SFmode)))) + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. + + On SH, this is a single integer, which is a number of words + of arguments scanned so far (including the invisible argument, + if any, which holds the structure-value-address). + Thus NARGREGS or more means all following args should go on the stack. */ + +enum sh_arg_class { SH_ARG_INT = 0, SH_ARG_FLOAT = 1 }; +struct sh_args { + int arg_count[2]; + int force_mem; + /* Nonzero if a prototype is available for the function. */ + int prototype_p; + /* The number of an odd floating-point register, that should be used + for the next argument of type float. */ + int free_single_fp_reg; + /* Whether we're processing an outgoing function call. */ + int outgoing; + /* The number of general-purpose registers that should have been + used to pass partial arguments, that are passed totally on the + stack. On SHcompact, a call trampoline will pop them off the + stack before calling the actual function, and, if the called + function is implemented in SHcompact mode, the incoming arguments + decoder will push such arguments back onto the stack. For + incoming arguments, STACK_REGS also takes into account other + arguments passed by reference, that the decoder will also push + onto the stack. */ + int stack_regs; + /* The number of general-purpose registers that should have been + used to pass arguments, if the arguments didn't have to be passed + by reference. */ + int byref_regs; + /* Set as by shcompact_byref if the current argument is to be passed + by reference. */ + int byref; + + /* call_cookie is a bitmask used by call expanders, as well as + function prologue and epilogues, to allow SHcompact to comply + with the SH5 32-bit ABI, that requires 64-bit registers to be + used even though only the lower 32-bit half is visible in + SHcompact mode. The strategy is to call SHmedia trampolines. + + The alternatives for each of the argument-passing registers are + (a) leave it unchanged; (b) pop it off the stack; (c) load its + contents from the address in it; (d) add 8 to it, storing the + result in the next register, then (c); (e) copy it from some + floating-point register, + + Regarding copies from floating-point registers, r2 may only be + copied from dr0. r3 may be copied from dr0 or dr2. r4 maybe + copied from dr0, dr2 or dr4. r5 maybe copied from dr0, dr2, + dr4 or dr6. r6 may be copied from dr0, dr2, dr4, dr6 or dr8. + r7 through to r9 may be copied from dr0, dr2, dr4, dr8, dr8 or + dr10. + + The bit mask is structured as follows: + + - 1 bit to tell whether to set up a return trampoline. + + - 3 bits to count the number consecutive registers to pop off the + stack. + + - 4 bits for each of r9, r8, r7 and r6. + + - 3 bits for each of r5, r4, r3 and r2. + + - 3 bits set to 0 (the most significant ones) + + 3 2 1 0 + 1098 7654 3210 9876 5432 1098 7654 3210 + FLPF LPFL PFLP FFLP FFLP FFLP FFLP SSST + 2223 3344 4555 6666 7777 8888 9999 SSS- + + - If F is set, the register must be copied from an FP register, + whose number is encoded in the remaining bits. + + - Else, if L is set, the register must be loaded from the address + contained in it. If the P bit is *not* set, the address of the + following dword should be computed first, and stored in the + following register. + + - Else, if P is set, the register alone should be popped off the + stack. + + - After all this processing, the number of registers represented + in SSS will be popped off the stack. This is an optimization + for pushing/popping consecutive registers, typically used for + varargs and large arguments partially passed in registers. + + - If T is set, a return trampoline will be set up for 64-bit + return values to be split into 2 32-bit registers. */ + long call_cookie; + + /* This is set to nonzero when the call in question must use the Renesas ABI, + even without the -mrenesas option. */ + int renesas_abi; +}; + +#define CALL_COOKIE_RET_TRAMP_SHIFT 0 +#define CALL_COOKIE_RET_TRAMP(VAL) ((VAL) << CALL_COOKIE_RET_TRAMP_SHIFT) +#define CALL_COOKIE_STACKSEQ_SHIFT 1 +#define CALL_COOKIE_STACKSEQ(VAL) ((VAL) << CALL_COOKIE_STACKSEQ_SHIFT) +#define CALL_COOKIE_STACKSEQ_GET(COOKIE) \ + (((COOKIE) >> CALL_COOKIE_STACKSEQ_SHIFT) & 7) +#define CALL_COOKIE_INT_REG_SHIFT(REG) \ + (4 * (7 - (REG)) + (((REG) <= 2) ? ((REG) - 2) : 1) + 3) +#define CALL_COOKIE_INT_REG(REG, VAL) \ + ((VAL) << CALL_COOKIE_INT_REG_SHIFT (REG)) +#define CALL_COOKIE_INT_REG_GET(COOKIE, REG) \ + (((COOKIE) >> CALL_COOKIE_INT_REG_SHIFT (REG)) & ((REG) < 4 ? 7 : 15)) + +#define CUMULATIVE_ARGS struct sh_args + +#define GET_SH_ARG_CLASS(MODE) \ + ((TARGET_FPU_ANY && (MODE) == SFmode) \ + ? SH_ARG_FLOAT \ + /* There's no mention of complex float types in the SH5 ABI, so we + should presumably handle them as aggregate types. */ \ + : TARGET_SH5 && GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT \ + ? SH_ARG_INT \ + : TARGET_FPU_DOUBLE && (GET_MODE_CLASS (MODE) == MODE_FLOAT \ + || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \ + ? SH_ARG_FLOAT : SH_ARG_INT) + +#define ROUND_ADVANCE(SIZE) \ + (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Round a register number up to a proper boundary for an arg of mode + MODE. + + The SH doesn't care about double alignment, so we only + round doubles to even regs when asked to explicitly. */ + +#define ROUND_REG(CUM, MODE) \ + (((TARGET_ALIGN_DOUBLE \ + || ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && ((MODE) == DFmode || (MODE) == DCmode) \ + && (CUM).arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (MODE)))\ + && GET_MODE_UNIT_SIZE ((MODE)) > UNITS_PER_WORD) \ + ? ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] \ + + ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] & 1)) \ + : (CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)]) + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. + + On SH, the offset always starts at 0: the first parm reg is always + the same reg for a given argument class. + + For TARGET_HITACHI, the structure value pointer is passed in memory. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + sh_init_cumulative_args (& (CUM), (FNTYPE), (LIBNAME), (FNDECL), (N_NAMED_ARGS), VOIDmode) + +#define INIT_CUMULATIVE_LIBCALL_ARGS(CUM, MODE, LIBNAME) \ + sh_init_cumulative_args (& (CUM), NULL_TREE, (LIBNAME), NULL_TREE, 0, (MODE)) + +/* Return boolean indicating arg of mode MODE will be passed in a reg. + This macro is only used in this file. */ + +#define PASS_IN_REG_P(CUM, MODE, TYPE) \ + (((TYPE) == 0 \ + || (! TREE_ADDRESSABLE ((TYPE)) \ + && (! (TARGET_HITACHI || (CUM).renesas_abi) \ + || ! (AGGREGATE_TYPE_P (TYPE) \ + || (!TARGET_FPU_ANY \ + && (GET_MODE_CLASS (MODE) == MODE_FLOAT \ + && GET_MODE_SIZE (MODE) > GET_MODE_SIZE (SFmode))))))) \ + && ! (CUM).force_mem \ + && (TARGET_SH2E \ + ? ((MODE) == BLKmode \ + ? (((CUM).arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD \ + + int_size_in_bytes (TYPE)) \ + <= NPARM_REGS (SImode) * UNITS_PER_WORD) \ + : ((ROUND_REG((CUM), (MODE)) \ + + HARD_REGNO_NREGS (BASE_ARG_REG (MODE), (MODE))) \ + <= NPARM_REGS (MODE))) \ + : ROUND_REG ((CUM), (MODE)) < NPARM_REGS (MODE))) + +/* By accident we got stuck with passing SCmode on SH4 little endian + in two registers that are nominally successive - which is different from + two single SFmode values, where we take endianness translation into + account. That does not work at all if an odd number of registers is + already in use, so that got fixed, but library functions are still more + likely to use complex numbers without mixing them with SFmode arguments + (which in C would have to be structures), so for the sake of ABI + compatibility the way SCmode values are passed when an even number of + FP registers is in use remains different from a pair of SFmode values for + now. + I.e.: + foo (double); a: fr5,fr4 + foo (float a, float b); a: fr5 b: fr4 + foo (__complex float a); a.real fr4 a.imag: fr5 - for consistency, + this should be the other way round... + foo (float a, __complex float b); a: fr5 b.real: fr4 b.imag: fr7 */ +#define FUNCTION_ARG_SCmode_WART 1 + +/* If an argument of size 5, 6 or 7 bytes is to be passed in a 64-bit + register in SHcompact mode, it must be padded in the most + significant end. This means that passing it by reference wouldn't + pad properly on a big-endian machine. In this particular case, we + pass this argument on the stack, in a way that the call trampoline + will load its value into the appropriate register. */ +#define SHCOMPACT_FORCE_ON_STACK(MODE,TYPE) \ + ((MODE) == BLKmode \ + && TARGET_SHCOMPACT \ + && ! TARGET_LITTLE_ENDIAN \ + && int_size_in_bytes (TYPE) > 4 \ + && int_size_in_bytes (TYPE) < 8) + +/* Minimum alignment for an argument to be passed by callee-copy + reference. We need such arguments to be aligned to 8 byte + boundaries, because they'll be loaded using quad loads. */ +#define SH_MIN_ALIGN_FOR_CALLEE_COPY (8 * BITS_PER_UNIT) + +/* The SH5 ABI requires floating-point arguments to be passed to + functions without a prototype in both an FP register and a regular + register or the stack. When passing the argument in both FP and + general-purpose registers, list the FP register first. */ +#define SH5_PROTOTYPELESS_FLOAT_ARG(CUM,MODE) \ + (gen_rtx_PARALLEL \ + ((MODE), \ + gen_rtvec (2, \ + gen_rtx_EXPR_LIST \ + (VOIDmode, \ + ((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) \ + ? gen_rtx_REG ((MODE), FIRST_FP_PARM_REG \ + + (CUM).arg_count[(int) SH_ARG_FLOAT]) \ + : NULL_RTX), \ + const0_rtx), \ + gen_rtx_EXPR_LIST \ + (VOIDmode, \ + ((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) \ + ? gen_rtx_REG ((MODE), FIRST_PARM_REG \ + + (CUM).arg_count[(int) SH_ARG_INT]) \ + : gen_rtx_REG ((MODE), FIRST_FP_PARM_REG \ + + (CUM).arg_count[(int) SH_ARG_FLOAT])), \ + const0_rtx)))) + +/* The SH5 ABI requires regular registers or stack slots to be + reserved for floating-point arguments. Registers are taken care of + in FUNCTION_ARG_ADVANCE, but stack slots must be reserved here. + Unfortunately, there's no way to just reserve a stack slot, so + we'll end up needlessly storing a copy of the argument in the + stack. For incoming arguments, however, the PARALLEL will be + optimized to the register-only form, and the value in the stack + slot won't be used at all. */ +#define SH5_PROTOTYPED_FLOAT_ARG(CUM,MODE,REG) \ + ((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) \ + ? gen_rtx_REG ((MODE), (REG)) \ + : gen_rtx_PARALLEL ((MODE), \ + gen_rtvec (2, \ + gen_rtx_EXPR_LIST \ + (VOIDmode, NULL_RTX, \ + const0_rtx), \ + gen_rtx_EXPR_LIST \ + (VOIDmode, gen_rtx_REG ((MODE), \ + (REG)), \ + const0_rtx)))) + +#define SH5_WOULD_BE_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \ + (TARGET_SH5 \ + && ((MODE) == BLKmode || (MODE) == TImode || (MODE) == CDImode \ + || (MODE) == DCmode) \ + && ((CUM).arg_count[(int) SH_ARG_INT] \ + + (((MODE) == BLKmode ? int_size_in_bytes (TYPE) \ + : GET_MODE_SIZE (MODE)) \ + + 7) / 8) > NPARM_REGS (SImode)) + +/* Perform any needed actions needed for a function that is receiving a + variable number of arguments. */ + +/* Call the function profiler with a given profile label. + We use two .aligns, so as to make sure that both the .long is aligned + on a 4 byte boundary, and that the .long is a fixed distance (2 bytes) + from the trapa instruction. */ + +#define FUNCTION_PROFILER(STREAM,LABELNO) \ +{ \ + if (TARGET_SHMEDIA) \ + { \ + fprintf((STREAM), "\tmovi\t33,r0\n"); \ + fprintf((STREAM), "\ttrapa\tr0\n"); \ + asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO)); \ + } \ + else \ + { \ + fprintf((STREAM), "\t.align\t2\n"); \ + fprintf((STREAM), "\ttrapa\t#33\n"); \ + fprintf((STREAM), "\t.align\t2\n"); \ + asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO)); \ + } \ +} + +/* Define this macro if the code for function profiling should come + before the function prologue. Normally, the profiling code comes + after. */ + +#define PROFILE_BEFORE_PROLOGUE + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ + +#define EXIT_IGNORE_STACK 1 + +/* + On the SH, the trampoline looks like + 2 0002 D202 mov.l l2,r2 + 1 0000 D301 mov.l l1,r3 + 3 0004 422B jmp @r2 + 4 0006 0009 nop + 5 0008 00000000 l1: .long area + 6 000c 00000000 l2: .long function */ + +/* Length in units of the trampoline for entering a nested function. */ +#define TRAMPOLINE_SIZE (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16) + +/* Alignment required for a trampoline in bits . */ +#define TRAMPOLINE_ALIGNMENT \ + ((CACHE_LOG < 3 || (optimize_size && ! TARGET_HARVARD)) ? 32 \ + : TARGET_SHMEDIA ? 256 : 64) + +/* A C expression whose value is RTL representing the value of the return + address for the frame COUNT steps up from the current frame. + FRAMEADDR is already the frame pointer of the COUNT frame, so we + can ignore COUNT. */ + +#define RETURN_ADDR_RTX(COUNT, FRAME) \ + (((COUNT) == 0) ? sh_get_pr_initial_val () : (rtx) 0) + +/* A C expression whose value is RTL representing the location of the + incoming return address at the beginning of any function, before the + prologue. This RTL is either a REG, indicating that the return + value is saved in REG, or a MEM representing a location in + the stack. */ +#define INCOMING_RETURN_ADDR_RTX \ + gen_rtx_REG (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG) + +/* Addressing modes, and classification of registers for them. */ +#define HAVE_POST_INCREMENT TARGET_SH1 +#define HAVE_PRE_DECREMENT TARGET_SH1 + +#define USE_LOAD_POST_INCREMENT(mode) ((mode == SImode || mode == DImode) \ + ? 0 : TARGET_SH1) +#define USE_LOAD_PRE_DECREMENT(mode) 0 +#define USE_STORE_POST_INCREMENT(mode) 0 +#define USE_STORE_PRE_DECREMENT(mode) ((mode == SImode || mode == DImode) \ + ? 0 : TARGET_SH1) + +#define MOVE_BY_PIECES_P(SIZE, ALIGN) \ + (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \ + < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2))) + +#define STORE_BY_PIECES_P(SIZE, ALIGN) \ + (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ + < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2))) + +#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN) + +/* Macros to check register numbers against specific register classes. */ + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ + +#define REGNO_OK_FOR_BASE_P(REGNO) \ + (GENERAL_OR_AP_REGISTER_P (REGNO) \ + || GENERAL_OR_AP_REGISTER_P (reg_renumber[(REGNO)])) +#define REGNO_OK_FOR_INDEX_P(REGNO) \ + (TARGET_SHMEDIA \ + ? (GENERAL_REGISTER_P (REGNO) \ + || GENERAL_REGISTER_P ((unsigned) reg_renumber[(REGNO)])) \ + : (REGNO) == R0_REG || (unsigned) reg_renumber[(REGNO)] == R0_REG) + +/* Maximum number of registers that can appear in a valid memory + address. */ + +#define MAX_REGS_PER_ADDRESS 2 + +/* Recognize any constant value that is a valid address. */ + +#define CONSTANT_ADDRESS_P(X) (GET_CODE (X) == LABEL_REF) + +/* Nonzero if the constant value X is a legitimate general operand. */ +/* can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */ + +#define LEGITIMATE_CONSTANT_P(X) \ + (TARGET_SHMEDIA \ + ? ((GET_MODE (X) != DFmode \ + && GET_MODE_CLASS (GET_MODE (X)) != MODE_VECTOR_FLOAT) \ + || (X) == CONST0_RTX (GET_MODE (X)) \ + || ! TARGET_SHMEDIA_FPU \ + || TARGET_SHMEDIA64) \ + : (GET_CODE (X) != CONST_DOUBLE \ + || GET_MODE (X) == DFmode || GET_MODE (X) == SFmode \ + || GET_MODE (X) == DImode || GET_MODE (X) == VOIDmode)) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + The suitable hard regs are always accepted and all pseudo regs + are also accepted if STRICT is not set. */ + +/* Nonzero if X is a reg that can be used as a base reg. */ +#define REG_OK_FOR_BASE_P(X, STRICT) \ + (GENERAL_OR_AP_REGISTER_P (REGNO (X)) \ + || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER)) + +/* Nonzero if X is a reg that can be used as an index. */ +#define REG_OK_FOR_INDEX_P(X, STRICT) \ + ((TARGET_SHMEDIA ? GENERAL_REGISTER_P (REGNO (X)) \ + : REGNO (X) == R0_REG) \ + || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER)) + +/* Nonzero if X/OFFSET is a reg that can be used as an index. */ +#define SUBREG_OK_FOR_INDEX_P(X, OFFSET, STRICT) \ + ((TARGET_SHMEDIA ? GENERAL_REGISTER_P (REGNO (X)) \ + : REGNO (X) == R0_REG && OFFSET == 0) \ + || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER)) + +/* Macros for extra constraints. */ + +#define IS_PC_RELATIVE_LOAD_ADDR_P(OP) \ + ((GET_CODE ((OP)) == LABEL_REF) \ + || (GET_CODE ((OP)) == CONST \ + && GET_CODE (XEXP ((OP), 0)) == PLUS \ + && GET_CODE (XEXP (XEXP ((OP), 0), 0)) == LABEL_REF \ + && CONST_INT_P (XEXP (XEXP ((OP), 0), 1)))) + +#define IS_NON_EXPLICIT_CONSTANT_P(OP) \ + (CONSTANT_P (OP) \ + && !CONST_INT_P (OP) \ + && GET_CODE (OP) != CONST_DOUBLE \ + && (!flag_pic \ + || (LEGITIMATE_PIC_OPERAND_P (OP) \ + && !PIC_ADDR_P (OP) \ + && GET_CODE (OP) != LABEL_REF))) + +/* Check whether OP is a datalabel unspec. */ +#define DATALABEL_REF_NO_CONST_P(OP) \ + (GET_CODE (OP) == UNSPEC \ + && XINT ((OP), 1) == UNSPEC_DATALABEL \ + && XVECLEN ((OP), 0) == 1 \ + && GET_CODE (XVECEXP ((OP), 0, 0)) == LABEL_REF) + +#define GOT_ENTRY_P(OP) \ + (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \ + && XINT (XEXP ((OP), 0), 1) == UNSPEC_GOT) + +#define GOTPLT_ENTRY_P(OP) \ + (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \ + && XINT (XEXP ((OP), 0), 1) == UNSPEC_GOTPLT) + +#define UNSPEC_GOTOFF_P(OP) \ + (GET_CODE (OP) == UNSPEC && XINT ((OP), 1) == UNSPEC_GOTOFF) + +#define GOTOFF_P(OP) \ + (GET_CODE (OP) == CONST \ + && (UNSPEC_GOTOFF_P (XEXP ((OP), 0)) \ + || (GET_CODE (XEXP ((OP), 0)) == PLUS \ + && UNSPEC_GOTOFF_P (XEXP (XEXP ((OP), 0), 0)) \ + && CONST_INT_P (XEXP (XEXP ((OP), 0), 1))))) + +#define PIC_ADDR_P(OP) \ + (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \ + && XINT (XEXP ((OP), 0), 1) == UNSPEC_PIC) + +#define PCREL_SYMOFF_P(OP) \ + (GET_CODE (OP) == CONST \ + && GET_CODE (XEXP ((OP), 0)) == UNSPEC \ + && XINT (XEXP ((OP), 0), 1) == UNSPEC_PCREL_SYMOFF) + +#define NON_PIC_REFERENCE_P(OP) \ + (GET_CODE (OP) == LABEL_REF || GET_CODE (OP) == SYMBOL_REF \ + || (GET_CODE (OP) == CONST \ + && (GET_CODE (XEXP ((OP), 0)) == LABEL_REF \ + || GET_CODE (XEXP ((OP), 0)) == SYMBOL_REF \ + || DATALABEL_REF_NO_CONST_P (XEXP ((OP), 0)))) \ + || (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == PLUS \ + && (GET_CODE (XEXP (XEXP ((OP), 0), 0)) == SYMBOL_REF \ + || GET_CODE (XEXP (XEXP ((OP), 0), 0)) == LABEL_REF \ + || DATALABEL_REF_NO_CONST_P (XEXP (XEXP ((OP), 0), 0))) \ + && CONST_INT_P (XEXP (XEXP ((OP), 0), 1)))) + +#define PIC_REFERENCE_P(OP) \ + (GOT_ENTRY_P (OP) || GOTPLT_ENTRY_P (OP) \ + || GOTOFF_P (OP) || PIC_ADDR_P (OP)) + +#define MOVI_SHORI_BASE_OPERAND_P(OP) \ + (flag_pic \ + ? (GOT_ENTRY_P (OP) || GOTPLT_ENTRY_P (OP) || GOTOFF_P (OP) \ + || PCREL_SYMOFF_P (OP)) \ + : NON_PIC_REFERENCE_P (OP)) + +#define MAYBE_BASE_REGISTER_RTX_P(X, STRICT) \ + ((REG_P (X) && REG_OK_FOR_BASE_P (X, STRICT)) \ + || (GET_CODE (X) == SUBREG \ + && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE ((X))), \ + GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (X)))) \ + && REG_P (SUBREG_REG (X)) \ + && REG_OK_FOR_BASE_P (SUBREG_REG (X), STRICT))) + +/* Since this must be r0, which is a single register class, we must check + SUBREGs more carefully, to be sure that we don't accept one that extends + outside the class. */ +#define MAYBE_INDEX_REGISTER_RTX_P(X, STRICT) \ + ((REG_P (X) && REG_OK_FOR_INDEX_P (X, STRICT)) \ + || (GET_CODE (X) == SUBREG \ + && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE ((X))), \ + GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (X)))) \ + && REG_P (SUBREG_REG (X)) \ + && SUBREG_OK_FOR_INDEX_P (SUBREG_REG (X), SUBREG_BYTE (X), STRICT))) + +#ifdef REG_OK_STRICT +#define BASE_REGISTER_RTX_P(X) MAYBE_BASE_REGISTER_RTX_P(X, true) +#define INDEX_REGISTER_RTX_P(X) MAYBE_INDEX_REGISTER_RTX_P(X, true) +#else +#define BASE_REGISTER_RTX_P(X) MAYBE_BASE_REGISTER_RTX_P(X, false) +#define INDEX_REGISTER_RTX_P(X) MAYBE_INDEX_REGISTER_RTX_P(X, false) +#endif + +#define ALLOW_INDEXED_ADDRESS \ + ((!TARGET_SHMEDIA32 && !TARGET_SHCOMPACT) || TARGET_ALLOW_INDEXED_ADDRESS) + +#define GO_IF_LEGITIMATE_INDEX(MODE, OP, WIN) \ + do { \ + if (sh_legitimate_index_p ((MODE), (OP))) \ + goto WIN; \ + } while (0) + +/* A C compound statement that attempts to replace X, which is an address + that needs reloading, with a valid memory address for an operand of + mode MODE. WIN is a C statement label elsewhere in the code. */ + +#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN) \ + do { \ + if (sh_legitimize_reload_address (&(X), (MODE), (OPNUM), (TYPE))) \ + goto WIN; \ + } while (0) + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE ((! optimize || TARGET_BIGTABLE) ? SImode : HImode) + +#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \ +((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 127 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \ + : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \ + : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 ? HImode \ + : SImode) + +/* Define as C expression which evaluates to nonzero if the tablejump + instruction expects the table to contain offsets from the address of the + table. + Do not define this if the table should contain absolute addresses. */ +#define CASE_VECTOR_PC_RELATIVE 1 + +/* Define it here, so that it doesn't get bumped to 64-bits on SHmedia. */ +#define FLOAT_TYPE_SIZE 32 + +/* Since the SH2e has only `float' support, it is desirable to make all + floating point types equivalent to `float'. */ +#define DOUBLE_TYPE_SIZE ((TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH2A_DOUBLE) ? 32 : 64) + +/* 'char' is signed by default. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* The type of size_t unsigned int. */ +#define SIZE_TYPE (TARGET_SH5 ? "long unsigned int" : "unsigned int") + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_SH5 ? "long int" : "int") + +#define WCHAR_TYPE "short unsigned int" +#define WCHAR_TYPE_SIZE 16 + +#define SH_ELF_WCHAR_TYPE "long int" + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX (TARGET_SHMEDIA ? 8 : 4) + +/* Maximum value possibly taken by MOVE_MAX. Must be defined whenever + MOVE_MAX is not a compile-time constant. */ +#define MAX_MOVE_MAX 8 + +/* Max number of bytes we want move_by_pieces to be able to copy + efficiently. */ +#define MOVE_MAX_PIECES (TARGET_SH4 || TARGET_SHMEDIA ? 8 : 4) + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +/* For SHmedia, we can truncate to QImode easier using zero extension. */ +/* FP registers can load SImode values, but don't implicitly sign-extend + them to DImode. */ +#define LOAD_EXTEND_OP(MODE) \ + (((MODE) == QImode && TARGET_SHMEDIA) ? ZERO_EXTEND \ + : (MODE) != SImode ? SIGN_EXTEND : UNKNOWN) + +/* Define if loading short immediate values into registers sign extends. */ +#define SHORT_IMMEDIATES_SIGN_EXTEND + +/* Nonzero if access to memory by bytes is no faster than for words. */ +#define SLOW_BYTE_ACCESS 1 + +/* Immediate shift counts are truncated by the output routines (or was it + the assembler?). Shift counts in a register are truncated by SH. Note + that the native compiler puts too large (> 32) immediate shift counts + into a register and shifts by the register, letting the SH decide what + to do instead of doing that itself. */ +/* ??? The library routines in lib1funcs.asm truncate the shift count. + However, the SH3 has hardware shifts that do not truncate exactly as gcc + expects - the sign bit is significant - so it appears that we need to + leave this zero for correct SH3 code. */ +#define SHIFT_COUNT_TRUNCATED (! TARGET_SH3 && ! TARGET_SH2A) + +/* All integers have the same format so truncation is easy. */ +/* But SHmedia must sign-extend DImode when truncating to SImode. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) \ + (!TARGET_SHMEDIA || (INPREC) < 64 || (OUTPREC) >= 64) + +/* Define this if addresses of constant functions + shouldn't be put through pseudo regs where they can be cse'd. + Desirable on machines where ordinary constants are expensive + but a CALL with constant address is cheap. */ +/*#define NO_FUNCTION_CSE 1*/ + +/* The machine modes of pointers and functions. */ +#define Pmode (TARGET_SHMEDIA64 ? DImode : SImode) +#define FUNCTION_MODE Pmode + +/* The multiply insn on the SH1 and the divide insns on the SH1 and SH2 + are actually function calls with some special constraints on arguments + and register usage. + + These macros tell reorg that the references to arguments and + register clobbers for insns of type sfunc do not appear to happen + until after the millicode call. This allows reorg to put insns + which set the argument registers into the delay slot of the millicode + call -- thus they act more like traditional CALL_INSNs. + + get_attr_is_sfunc will try to recognize the given insn, so make sure to + filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns + in particular. */ + +#define INSN_SETS_ARE_DELAYED(X) \ + ((NONJUMP_INSN_P (X) \ + && GET_CODE (PATTERN (X)) != SEQUENCE \ + && GET_CODE (PATTERN (X)) != USE \ + && GET_CODE (PATTERN (X)) != CLOBBER \ + && get_attr_is_sfunc (X))) + +#define INSN_REFERENCES_ARE_DELAYED(X) \ + ((NONJUMP_INSN_P (X) \ + && GET_CODE (PATTERN (X)) != SEQUENCE \ + && GET_CODE (PATTERN (X)) != USE \ + && GET_CODE (PATTERN (X)) != CLOBBER \ + && get_attr_is_sfunc (X))) + + +/* Position Independent Code. */ + +/* We can't directly access anything that contains a symbol, + nor can we indirect via the constant pool. */ +#define LEGITIMATE_PIC_OPERAND_P(X) \ + ((! nonpic_symbol_mentioned_p (X) \ + && (GET_CODE (X) != SYMBOL_REF \ + || ! CONSTANT_POOL_ADDRESS_P (X) \ + || ! nonpic_symbol_mentioned_p (get_pool_constant (X)))) \ + || (TARGET_SHMEDIA && GET_CODE (X) == LABEL_REF)) + +#define SYMBOLIC_CONST_P(X) \ +((GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == LABEL_REF) \ + && nonpic_symbol_mentioned_p (X)) + +/* Compute extra cost of moving data between one register class + and another. */ + +/* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass + uses this information. Hence, the general register <-> floating point + register information here is not used for SFmode. */ + +#define REGCLASS_HAS_GENERAL_REG(CLASS) \ + ((CLASS) == GENERAL_REGS || (CLASS) == R0_REGS \ + || (! TARGET_SHMEDIA && (CLASS) == SIBCALL_REGS)) + +#define REGCLASS_HAS_FP_REG(CLASS) \ + ((CLASS) == FP0_REGS || (CLASS) == FP_REGS \ + || (CLASS) == DF_REGS || (CLASS) == DF_HI_REGS) + +/* ??? Perhaps make MEMORY_MOVE_COST depend on compiler option? This + would be so that people with slow memory systems could generate + different code that does fewer memory accesses. */ + +/* A C expression for the cost of a branch instruction. A value of 1 + is the default; other values are interpreted relative to that. + The SH1 does not have delay slots, hence we get a pipeline stall + at every branch. The SH4 is superscalar, so the single delay slot + is not sufficient to keep both pipelines filled. */ +#define BRANCH_COST(speed_p, predictable_p) \ + (TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1) + +/* Assembler output control. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will end at + the end of the line. */ +#define ASM_COMMENT_START "!" + +#define ASM_APP_ON "" +#define ASM_APP_OFF "" +#define FILE_ASM_OP "\t.file\n" +#define SET_ASM_OP "\t.set\t" + +/* How to change between sections. */ + +#define TEXT_SECTION_ASM_OP (TARGET_SHMEDIA32 ? "\t.section\t.text..SHmedia32,\"ax\"" : "\t.text") +#define DATA_SECTION_ASM_OP "\t.data" + +#if defined CRT_BEGIN || defined CRT_END +/* Arrange for TEXT_SECTION_ASM_OP to be a compile-time constant. */ +# undef TEXT_SECTION_ASM_OP +# if __SHMEDIA__ == 1 && __SH5__ == 32 +# define TEXT_SECTION_ASM_OP "\t.section\t.text..SHmedia32,\"ax\"" +# else +# define TEXT_SECTION_ASM_OP "\t.text" +# endif +#endif + + +/* If defined, a C expression whose value is a string containing the + assembler operation to identify the following data as + uninitialized global data. If not defined, and neither + `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined, + uninitialized global data will be output in the data section if + `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be + used. */ +#ifndef BSS_SECTION_ASM_OP +#define BSS_SECTION_ASM_OP "\t.section\t.bss" +#endif + +/* Like `ASM_OUTPUT_BSS' except takes the required alignment as a + separate, explicit argument. If you define this macro, it is used + in place of `ASM_OUTPUT_BSS', and gives you more flexibility in + handling the required alignment of the variable. The alignment is + specified as the number of bits. + + Try to use function `asm_output_aligned_bss' defined in file + `varasm.c' when defining this macro. */ +#ifndef ASM_OUTPUT_ALIGNED_BSS +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) +#endif + +/* Define this so that jump tables go in same section as the current function, + which could be text or it could be a user defined section. */ +#define JUMP_TABLES_IN_TEXT_SECTION 1 + +#undef DO_GLOBAL_CTORS_BODY +#define DO_GLOBAL_CTORS_BODY \ +{ \ + typedef void (*pfunc) (void); \ + extern pfunc __ctors[]; \ + extern pfunc __ctors_end[]; \ + pfunc *p; \ + for (p = __ctors_end; p > __ctors; ) \ + { \ + (*--p)(); \ + } \ +} + +#undef DO_GLOBAL_DTORS_BODY +#define DO_GLOBAL_DTORS_BODY \ +{ \ + typedef void (*pfunc) (void); \ + extern pfunc __dtors[]; \ + extern pfunc __dtors_end[]; \ + pfunc *p; \ + for (p = __dtors; p < __dtors_end; p++) \ + { \ + (*p)(); \ + } \ +} + +#define ASM_OUTPUT_REG_PUSH(file, v) \ +{ \ + if (TARGET_SHMEDIA) \ + { \ + fprintf ((file), "\taddi.l\tr15,-8,r15\n"); \ + fprintf ((file), "\tst.q\tr15,0,r%d\n", (v)); \ + } \ + else \ + fprintf ((file), "\tmov.l\tr%d,@-r15\n", (v)); \ +} + +#define ASM_OUTPUT_REG_POP(file, v) \ +{ \ + if (TARGET_SHMEDIA) \ + { \ + fprintf ((file), "\tld.q\tr15,0,r%d\n", (v)); \ + fprintf ((file), "\taddi.l\tr15,8,r15\n"); \ + } \ + else \ + fprintf ((file), "\tmov.l\t@r15+,r%d\n", (v)); \ +} + +/* DBX register number for a given compiler register number. */ +/* GDB has FPUL at 23 and FP0 at 25, so we must add one to all FP registers + to match gdb. */ +/* expand_builtin_init_dwarf_reg_sizes uses this to test if a + register exists, so we should return -1 for invalid register numbers. */ +#define DBX_REGISTER_NUMBER(REGNO) SH_DBX_REGISTER_NUMBER (REGNO) + +/* SHcompact PR_REG used to use the encoding 241, and SHcompact FP registers + used to use the encodings 245..260, but that doesn't make sense: + PR_REG and PR_MEDIA_REG are actually the same register, and likewise + the FP registers stay the same when switching between compact and media + mode. Hence, we also need to use the same dwarf frame columns. + Likewise, we need to support unwind information for SHmedia registers + even in compact code. */ +#define SH_DBX_REGISTER_NUMBER(REGNO) \ + (IN_RANGE ((REGNO), \ + (unsigned HOST_WIDE_INT) FIRST_GENERAL_REG, \ + FIRST_GENERAL_REG + (TARGET_SH5 ? 63U :15U)) \ + ? ((unsigned) (REGNO) - FIRST_GENERAL_REG) \ + : ((int) (REGNO) >= FIRST_FP_REG \ + && ((int) (REGNO) \ + <= (FIRST_FP_REG + \ + ((TARGET_SH5 && TARGET_FPU_ANY) ? 63 : TARGET_SH2E ? 15 : -1)))) \ + ? ((unsigned) (REGNO) - FIRST_FP_REG \ + + (TARGET_SH5 ? 77 : 25)) \ + : XD_REGISTER_P (REGNO) \ + ? ((unsigned) (REGNO) - FIRST_XD_REG + (TARGET_SH5 ? 289 : 87)) \ + : TARGET_REGISTER_P (REGNO) \ + ? ((unsigned) (REGNO) - FIRST_TARGET_REG + 68) \ + : (REGNO) == PR_REG \ + ? (TARGET_SH5 ? 18 : 17) \ + : (REGNO) == PR_MEDIA_REG \ + ? (TARGET_SH5 ? 18 : (unsigned) -1) \ + : (REGNO) == GBR_REG \ + ? (TARGET_SH5 ? 238 : 18) \ + : (REGNO) == MACH_REG \ + ? (TARGET_SH5 ? 239 : 20) \ + : (REGNO) == MACL_REG \ + ? (TARGET_SH5 ? 240 : 21) \ + : (REGNO) == T_REG \ + ? (TARGET_SH5 ? 242 : 22) \ + : (REGNO) == FPUL_REG \ + ? (TARGET_SH5 ? 244 : 23) \ + : (REGNO) == FPSCR_REG \ + ? (TARGET_SH5 ? 243 : 24) \ + : (unsigned) -1) + +/* This is how to output a reference to a symbol_ref. On SH5, + references to non-code symbols must be preceded by `datalabel'. */ +#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM) \ + do \ + { \ + if (TARGET_SH5 && !SYMBOL_REF_FUNCTION_P (SYM)) \ + fputs ("datalabel ", (FILE)); \ + assemble_name ((FILE), XSTR ((SYM), 0)); \ + } \ + while (0) + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + if ((LOG) != 0) \ + fprintf ((FILE), "\t.align %d\n", (LOG)) + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global\t" + +/* #define ASM_OUTPUT_CASE_END(STREAM,NUM,TABLE) */ + +/* Output a relative address table. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM,BODY,VALUE,REL) \ + switch (GET_MODE (BODY)) \ + { \ + case SImode: \ + if (TARGET_SH5) \ + { \ + asm_fprintf ((STREAM), "\t.long\t%LL%d-datalabel %LL%d\n", \ + (VALUE), (REL)); \ + break; \ + } \ + asm_fprintf ((STREAM), "\t.long\t%LL%d-%LL%d\n", (VALUE),(REL)); \ + break; \ + case HImode: \ + if (TARGET_SH5) \ + { \ + asm_fprintf ((STREAM), "\t.word\t%LL%d-datalabel %LL%d\n", \ + (VALUE), (REL)); \ + break; \ + } \ + asm_fprintf ((STREAM), "\t.word\t%LL%d-%LL%d\n", (VALUE),(REL)); \ + break; \ + case QImode: \ + if (TARGET_SH5) \ + { \ + asm_fprintf ((STREAM), "\t.byte\t%LL%d-datalabel %LL%d\n", \ + (VALUE), (REL)); \ + break; \ + } \ + asm_fprintf ((STREAM), "\t.byte\t%LL%d-%LL%d\n", (VALUE),(REL)); \ + break; \ + default: \ + break; \ + } + +/* Output an absolute table element. */ + +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE) \ + if (! optimize || TARGET_BIGTABLE) \ + asm_fprintf ((STREAM), "\t.long\t%LL%d\n", (VALUE)); \ + else \ + asm_fprintf ((STREAM), "\t.word\t%LL%d\n", (VALUE)); + + +/* A C statement to be executed just prior to the output of + assembler code for INSN, to modify the extracted operands so + they will be output differently. + + Here the argument OPVEC is the vector containing the operands + extracted from INSN, and NOPERANDS is the number of elements of + the vector which contain meaningful data for this insn. + The contents of this vector are what will be used to convert the insn + template into assembler code, so you can change the assembler output + by changing the contents of the vector. */ + +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + final_prescan_insn ((INSN), (OPVEC), (NOPERANDS)) + + +extern struct rtx_def *sh_compare_op0; +extern struct rtx_def *sh_compare_op1; + +/* Which processor to schedule for. The elements of the enumeration must + match exactly the cpu attribute in the sh.md file. */ + +enum processor_type { + PROCESSOR_SH1, + PROCESSOR_SH2, + PROCESSOR_SH2E, + PROCESSOR_SH2A, + PROCESSOR_SH3, + PROCESSOR_SH3E, + PROCESSOR_SH4, + PROCESSOR_SH4A, + PROCESSOR_SH5 +}; + +#define sh_cpu_attr ((enum attr_cpu)sh_cpu) +extern enum processor_type sh_cpu; + +enum mdep_reorg_phase_e +{ + SH_BEFORE_MDEP_REORG, + SH_INSERT_USES_LABELS, + SH_SHORTEN_BRANCHES0, + SH_FIXUP_PCLOAD, + SH_SHORTEN_BRANCHES1, + SH_AFTER_MDEP_REORG +}; + +extern enum mdep_reorg_phase_e mdep_reorg_phase; + +/* Handle Renesas compiler's pragmas. */ +#define REGISTER_TARGET_PRAGMAS() do { \ + c_register_pragma (0, "interrupt", sh_pr_interrupt); \ + c_register_pragma (0, "trapa", sh_pr_trapa); \ + c_register_pragma (0, "nosave_low_regs", sh_pr_nosave_low_regs); \ +} while (0) + +extern tree sh_deferred_function_attributes; +extern tree *sh_deferred_function_attributes_tail; + +/* Set when processing a function with interrupt attribute. */ + +extern int current_function_interrupt; + + +/* Instructions with unfilled delay slots take up an + extra two bytes for the nop in the delay slot. + sh-dsp parallel processing insns are four bytes long. */ + +#define ADJUST_INSN_LENGTH(X, LENGTH) \ + (LENGTH) += sh_insn_length_adjustment (X); + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. + + Leaving the unsignedp unchanged gives better code than always setting it + to 0. This is despite the fact that we have only signed char and short + load instructions. */ +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < 4/* ! UNITS_PER_WORD */)\ + (UNSIGNEDP) = ((MODE) == SImode ? 0 : (UNSIGNEDP)), \ + (MODE) = (TARGET_SH1 ? SImode \ + : TARGET_SHMEDIA32 ? SImode : DImode); + +#define MAX_FIXED_MODE_SIZE (TARGET_SH5 ? 128 : 64) + +#define SIDI_OFF (TARGET_LITTLE_ENDIAN ? 0 : 4) + +/* Better to allocate once the maximum space for outgoing args in the + prologue rather than duplicate around each call. */ +#define ACCUMULATE_OUTGOING_ARGS TARGET_ACCUMULATE_OUTGOING_ARGS + +#define SH_DYNAMIC_SHIFT_COST \ + (TARGET_HARD_SH4 ? 1 : TARGET_SH3 ? (optimize_size ? 1 : 2) : 20) + + +#define NUM_MODES_FOR_MODE_SWITCHING { FP_MODE_NONE } + +#define OPTIMIZE_MODE_SWITCHING(ENTITY) (TARGET_SH4 || TARGET_SH2A_DOUBLE) + +#define ACTUAL_NORMAL_MODE(ENTITY) \ + (TARGET_FPU_SINGLE ? FP_MODE_SINGLE : FP_MODE_DOUBLE) + +#define NORMAL_MODE(ENTITY) \ + (sh_cfun_interrupt_handler_p () \ + ? (TARGET_FMOVD ? FP_MODE_DOUBLE : FP_MODE_NONE) \ + : ACTUAL_NORMAL_MODE (ENTITY)) + +#define MODE_ENTRY(ENTITY) NORMAL_MODE (ENTITY) + +#define MODE_EXIT(ENTITY) \ + (sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (ENTITY)) + +#define EPILOGUE_USES(REGNO) ((TARGET_SH2E || TARGET_SH4) \ + && (REGNO) == FPSCR_REG) + +#define MODE_NEEDED(ENTITY, INSN) \ + (recog_memoized (INSN) >= 0 \ + ? get_attr_fp_mode (INSN) \ + : FP_MODE_NONE) + +#define MODE_AFTER(MODE, INSN) \ + (TARGET_HITACHI \ + && recog_memoized (INSN) >= 0 \ + && get_attr_fp_set (INSN) != FP_SET_NONE \ + ? (int) get_attr_fp_set (INSN) \ + : (MODE)) + +#define MODE_PRIORITY_TO_MODE(ENTITY, N) \ + ((TARGET_FPU_SINGLE != 0) ^ (N) ? FP_MODE_SINGLE : FP_MODE_DOUBLE) + +#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ + fpscr_set_from_mem ((MODE), (HARD_REGS_LIVE)) + +#define MD_CAN_REDIRECT_BRANCH(INSN, SEQ) \ + sh_can_redirect_branch ((INSN), (SEQ)) + +#define DWARF_FRAME_RETURN_COLUMN \ + (TARGET_SH5 ? DWARF_FRAME_REGNUM (PR_MEDIA_REG) : DWARF_FRAME_REGNUM (PR_REG)) + +#define EH_RETURN_DATA_REGNO(N) \ + ((N) < 4 ? (N) + (TARGET_SH5 ? 2U : 4U) : INVALID_REGNUM) + +#define EH_RETURN_STACKADJ_REGNO STATIC_CHAIN_REGNUM +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, EH_RETURN_STACKADJ_REGNO) + +/* We have to distinguish between code and data, so that we apply + datalabel where and only where appropriate. Use sdataN for data. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \ + | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr) \ + | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4))) + +/* Handle special EH pointer encodings. Absolute, pc-relative, and + indirect are handled automatically. */ +#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \ + do { \ + if (((ENCODING) & 0xf) != DW_EH_PE_sdata4 \ + && ((ENCODING) & 0xf) != DW_EH_PE_sdata8) \ + { \ + gcc_assert (GET_CODE (ADDR) == SYMBOL_REF); \ + SYMBOL_REF_FLAGS (ADDR) |= SYMBOL_FLAG_FUNCTION; \ + if (0) goto DONE; \ + } \ + } while (0) + +#if (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__ +/* SH constant pool breaks the devices in crtstuff.c to control section + in where code resides. We have to write it as asm code. */ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\ + mov.l 1f,r1\n\ + mova 2f,r0\n\ + braf r1\n\ + lds r0,pr\n\ +0: .p2align 2\n\ +1: .long " USER_LABEL_PREFIX #FUNC " - 0b\n\ +2:\n" TEXT_SECTION_ASM_OP); +#endif /* (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__ */ + +/* FIXME: middle-end support for highpart optimizations is missing. */ +#define high_life_started reload_in_progress + +#endif /* ! GCC_SH_H */ diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md new file mode 100644 index 000000000..e261d3339 --- /dev/null +++ b/gcc/config/sh/sh.md @@ -0,0 +1,13490 @@ +;;- Machine description for Renesas / SuperH SH. +;; Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +;; 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 +;; Free Software Foundation, Inc. +;; Contributed by Steve Chamberlain (sac@cygnus.com). +;; Improved by Jim Wilson (wilson@cygnus.com). + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; ??? Should prepend a * to all pattern names which are not used. +;; This will make the compiler smaller, and rebuilds after changes faster. + +;; ??? Should be enhanced to include support for many more GNU superoptimizer +;; sequences. Especially the sequences for arithmetic right shifts. + +;; ??? Should check all DImode patterns for consistency and usefulness. + +;; ??? The MAC.W and MAC.L instructions are not supported. There is no +;; way to generate them. + +;; ??? The cmp/str instruction is not supported. Perhaps it can be used +;; for a str* inline function. + +;; BSR is not generated by the compiler proper, but when relaxing, it +;; generates .uses pseudo-ops that allow linker relaxation to create +;; BSR. This is actually implemented in bfd/{coff,elf32}-sh.c + +;; Special constraints for SH machine description: +;; +;; t -- T +;; x -- mac +;; l -- pr +;; z -- r0 +;; +;; Special formats used for outputting SH instructions: +;; +;; %. -- print a .s if insn needs delay slot +;; %@ -- print rte/rts if is/isn't an interrupt function +;; %# -- output a nop if there is nothing to put in the delay slot +;; %O -- print a constant without the # +;; %R -- print the lsw reg of a double +;; %S -- print the msw reg of a double +;; %T -- print next word of a double REG or MEM +;; +;; Special predicates: +;; +;; arith_operand -- operand is valid source for arithmetic op +;; arith_reg_operand -- operand is valid register for arithmetic op +;; general_movdst_operand -- operand is valid move destination +;; general_movsrc_operand -- operand is valid move source +;; logical_operand -- operand is valid source for logical op + +;; ------------------------------------------------------------------------- +;; Constants +;; ------------------------------------------------------------------------- + +(define_constants [ + (AP_REG 145) + (PR_REG 146) + (T_REG 147) + (GBR_REG 144) + (MACH_REG 148) + (MACL_REG 149) + (FPUL_REG 150) + (RAP_REG 152) + + (FPSCR_REG 151) + + (PIC_REG 12) + (FP_REG 14) + (SP_REG 15) + + (PR_MEDIA_REG 18) + (T_MEDIA_REG 19) + + (R0_REG 0) + (R1_REG 1) + (R2_REG 2) + (R3_REG 3) + (R4_REG 4) + (R5_REG 5) + (R6_REG 6) + (R7_REG 7) + (R8_REG 8) + (R9_REG 9) + (R10_REG 10) + (R20_REG 20) + (R21_REG 21) + (R22_REG 22) + (R23_REG 23) + + (DR0_REG 64) + (DR2_REG 66) + (DR4_REG 68) + (FR23_REG 87) + + (TR0_REG 128) + (TR1_REG 129) + (TR2_REG 130) + + (XD0_REG 136) + + ;; These are used with unspec. + (UNSPEC_COMPACT_ARGS 0) + (UNSPEC_MOVA 1) + (UNSPEC_CASESI 2) + (UNSPEC_DATALABEL 3) + (UNSPEC_BBR 4) + (UNSPEC_SFUNC 5) + (UNSPEC_PIC 6) + (UNSPEC_GOT 7) + (UNSPEC_GOTOFF 8) + (UNSPEC_PLT 9) + (UNSPEC_CALLER 10) + (UNSPEC_GOTPLT 11) + (UNSPEC_ICACHE 12) + (UNSPEC_INIT_TRAMP 13) + (UNSPEC_FCOSA 14) + (UNSPEC_FSRRA 15) + (UNSPEC_FSINA 16) + (UNSPEC_NSB 17) + (UNSPEC_ALLOCO 18) + (UNSPEC_TLSGD 20) + (UNSPEC_TLSLDM 21) + (UNSPEC_TLSIE 22) + (UNSPEC_DTPOFF 23) + (UNSPEC_GOTTPOFF 24) + (UNSPEC_TPOFF 25) + (UNSPEC_RA 26) + (UNSPEC_DIV_INV_M0 30) + (UNSPEC_DIV_INV_M1 31) + (UNSPEC_DIV_INV_M2 32) + (UNSPEC_DIV_INV_M3 33) + (UNSPEC_DIV_INV20 34) + (UNSPEC_DIV_INV_TABLE 37) + (UNSPEC_ASHIFTRT 35) + (UNSPEC_THUNK 36) + (UNSPEC_CHKADD 38) + (UNSPEC_SP_SET 40) + (UNSPEC_SP_TEST 41) + (UNSPEC_MOVUA 42) + + ;; (unspec [VAL SHIFT] UNSPEC_EXTRACT_S16) computes (short) (VAL >> SHIFT). + ;; UNSPEC_EXTRACT_U16 is the unsigned equivalent. + (UNSPEC_EXTRACT_S16 43) + (UNSPEC_EXTRACT_U16 44) + + ;; (unspec [TARGET ANCHOR] UNSPEC_SYMOFF) == TARGET - ANCHOR. + (UNSPEC_SYMOFF 45) + + ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .). + (UNSPEC_PCREL_SYMOFF 46) + + ;; These are used with unspec_volatile. + (UNSPECV_BLOCKAGE 0) + (UNSPECV_ALIGN 1) + (UNSPECV_CONST2 2) + (UNSPECV_CONST4 4) + (UNSPECV_CONST8 6) + (UNSPECV_WINDOW_END 10) + (UNSPECV_CONST_END 11) + (UNSPECV_EH_RETURN 12) +]) + +;; ------------------------------------------------------------------------- +;; Attributes +;; ------------------------------------------------------------------------- + +;; Target CPU. + +(define_attr "cpu" + "sh1,sh2,sh2e,sh2a,sh3,sh3e,sh4,sh4a,sh5" + (const (symbol_ref "sh_cpu_attr"))) + +(define_attr "endian" "big,little" + (const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN") + (const_string "little") (const_string "big")))) + +;; Indicate if the default fpu mode is single precision. +(define_attr "fpu_single" "yes,no" + (const (if_then_else (symbol_ref "TARGET_FPU_SINGLE") + (const_string "yes") (const_string "no")))) + +(define_attr "fmovd" "yes,no" + (const (if_then_else (symbol_ref "TARGET_FMOVD") + (const_string "yes") (const_string "no")))) +;; pipeline model +(define_attr "pipe_model" "sh1,sh4,sh5media" + (const + (cond [(symbol_ref "TARGET_SHMEDIA") (const_string "sh5media") + (symbol_ref "TARGET_SUPERSCALAR") (const_string "sh4")] + (const_string "sh1")))) + +;; cbranch conditional branch instructions +;; jump unconditional jumps +;; arith ordinary arithmetic +;; arith3 a compound insn that behaves similarly to a sequence of +;; three insns of type arith +;; arith3b like above, but might end with a redirected branch +;; load from memory +;; load_si Likewise, SImode variant for general register. +;; fload Likewise, but load to fp register. +;; store to memory +;; fstore floating point register to memory +;; move general purpose register to register +;; movi8 8-bit immediate to general purpose register +;; mt_group other sh4 mt instructions +;; fmove register to register, floating point +;; smpy word precision integer multiply +;; dmpy longword or doublelongword precision integer multiply +;; return rts +;; pload load of pr reg, which can't be put into delay slot of rts +;; prset copy register to pr reg, ditto +;; pstore store of pr reg, which can't be put into delay slot of jsr +;; prget copy pr to register, ditto +;; pcload pc relative load of constant value +;; pcfload Likewise, but load to fp register. +;; pcload_si Likewise, SImode variant for general register. +;; rte return from exception +;; sfunc special function call with known used registers +;; call function call +;; fp floating point +;; fpscr_toggle toggle a bit in the fpscr +;; fdiv floating point divide (or square root) +;; gp_fpul move from general purpose register to fpul +;; fpul_gp move from fpul to general purpose register +;; mac_gp move from mac[lh] to general purpose register +;; gp_mac move from general purpose register to mac[lh] +;; mac_mem move from mac[lh] to memory +;; mem_mac move from memory to mac[lh] +;; dfp_arith,dfp_mul, fp_cmp,dfp_cmp,dfp_conv +;; ftrc_s fix_truncsfsi2_i4 +;; dfdiv double precision floating point divide (or square root) +;; cwb ic_invalidate_line_i +;; movua SH4a unaligned load +;; fsrra square root reciprocal approximate +;; fsca sine and cosine approximate +;; tls_load load TLS related address +;; arith_media SHmedia arithmetic, logical, and shift instructions +;; cbranch_media SHmedia conditional branch instructions +;; cmp_media SHmedia compare instructions +;; dfdiv_media SHmedia double precision divide and square root +;; dfmul_media SHmedia double precision multiply instruction +;; dfparith_media SHmedia double precision floating point arithmetic +;; dfpconv_media SHmedia double precision floating point conversions +;; dmpy_media SHmedia longword multiply +;; fcmp_media SHmedia floating point compare instructions +;; fdiv_media SHmedia single precision divide and square root +;; fload_media SHmedia floating point register load instructions +;; fmove_media SHmedia floating point register moves (inc. fabs and fneg) +;; fparith_media SHmedia single precision floating point arithmetic +;; fpconv_media SHmedia single precision floating point conversions +;; fstore_media SHmedia floating point register store instructions +;; gettr_media SHmedia gettr instruction +;; invalidate_line_media SHmedia invalidate_line sequence +;; jump_media SHmedia unconditional branch instructions +;; load_media SHmedia general register load instructions +;; pt_media SHmedia pt instruction (expanded by assembler) +;; ptabs_media SHmedia ptabs instruction +;; store_media SHmedia general register store instructions +;; mcmp_media SHmedia multimedia compare, absolute, saturating ops +;; mac_media SHmedia mac-style fixed point operations +;; d2mpy_media SHmedia: two 32-bit integer multiplies +;; atrans_media SHmedia approximate transcendental functions +;; ustore_media SHmedia unaligned stores +;; nil no-op move, will be deleted. + +(define_attr "type" + "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,fstore,move,movi8,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fpscr_toggle,fdiv,ftrc_s,dfp_arith,dfp_mul,fp_cmp,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,gp_mac,mac_mem,mem_mac,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other" + (const_string "other")) + +;; We define a new attribute namely "insn_class".We use +;; this for the DFA based pipeline description. +;; +;; mt_group SH4 "mt" group instructions. +;; +;; ex_group SH4 "ex" group instructions. +;; +;; ls_group SH4 "ls" group instructions. +;; + +(define_attr "insn_class" + "mt_group,ex_group,ls_group,br_group,fe_group,co_group,none" + (cond [(eq_attr "type" "move,mt_group") (const_string "mt_group") + (eq_attr "type" "movi8,arith,dyn_shift") (const_string "ex_group") + (eq_attr "type" "fmove,load,pcload,load_si,pcload_si,fload,pcfload,store,fstore,gp_fpul,fpul_gp") (const_string "ls_group") + (eq_attr "type" "cbranch,jump") (const_string "br_group") + (eq_attr "type" "fp,fp_cmp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv") + (const_string "fe_group") + (eq_attr "type" "jump_ind,smpy,dmpy,mac_gp,return,pload,prset,pstore,prget,rte,sfunc,call,dfp_cmp,mem_fpscr,gp_fpscr,cwb,gp_mac,mac_mem,mem_mac") (const_string "co_group")] + (const_string "none"))) +;; nil are zero instructions, and arith3 / arith3b are multiple instructions, +;; so these do not belong in an insn group, although they are modeled +;; with their own define_insn_reservations. + +;; Indicate what precision must be selected in fpscr for this insn, if any. + +(define_attr "fp_mode" "single,double,none" (const_string "none")) + +;; Indicate if the fpu mode is set by this instruction +;; "unknown" must have the value as "none" in fp_mode, and means +;; that the instruction/abi has left the processor in an unknown +;; state. +;; "none" means that nothing has changed and no mode is set. +;; This attribute is only used for the Renesas ABI. +(define_attr "fp_set" "single,double,unknown,none" (const_string "none")) + +; If a conditional branch destination is within -252..258 bytes away +; from the instruction it can be 2 bytes long. Something in the +; range -4090..4100 bytes can be 6 bytes long. All other conditional +; branches are initially assumed to be 16 bytes long. +; In machine_dependent_reorg, we split all branches that are longer than +; 2 bytes. + +;; The maximum range used for SImode constant pool entries is 1018. A final +;; instruction can add 8 bytes while only being 4 bytes in size, thus we +;; can have a total of 1022 bytes in the pool. Add 4 bytes for a branch +;; instruction around the pool table, 2 bytes of alignment before the table, +;; and 30 bytes of alignment after the table. That gives a maximum total +;; pool size of 1058 bytes. +;; Worst case code/pool content size ratio is 1:2 (using asms). +;; Thus, in the worst case, there is one instruction in front of a maximum +;; sized pool, and then there are 1052 bytes of pool for every 508 bytes of +;; code. For the last n bytes of code, there are 2n + 36 bytes of pool. +;; If we have a forward branch, the initial table will be put after the +;; unconditional branch. +;; +;; ??? We could do much better by keeping track of the actual pcloads within +;; the branch range and in the pcload range in front of the branch range. + +;; ??? This looks ugly because genattrtab won't allow if_then_else or cond +;; inside an le. +(define_attr "short_cbranch_p" "no,yes" + (cond [(ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 506)) + (const_string "yes") + (ne (symbol_ref "NEXT_INSN (PREV_INSN (insn)) != insn") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 508)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "med_branch_p" "no,yes" + (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 990)) + (const_int 1988)) + (const_string "yes") + (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 4092)) + (const_int 8186)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "med_cbranch_p" "no,yes" + (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 988)) + (const_int 1986)) + (const_string "yes") + (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 4090)) + (const_int 8184)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "braf_branch_p" "no,yes" + (cond [(ne (symbol_ref "! TARGET_SH2") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 10330)) + (const_int 20660)) + (const_string "yes") + (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 32764)) + (const_int 65530)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "braf_cbranch_p" "no,yes" + (cond [(ne (symbol_ref "! TARGET_SH2") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 10328)) + (const_int 20658)) + (const_string "yes") + (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 32762)) + (const_int 65528)) + (const_string "yes") + ] (const_string "no"))) + +; An unconditional jump in the range -4092..4098 can be 2 bytes long. +; For wider ranges, we need a combination of a code and a data part. +; If we can get a scratch register for a long range jump, the code +; part can be 4 bytes long; otherwise, it must be 8 bytes long. +; If the jump is in the range -32764..32770, the data part can be 2 bytes +; long; otherwise, it must be 6 bytes long. + +; All other instructions are two bytes long by default. + +;; ??? This should use something like *branch_p (minus (match_dup 0) (pc)), +;; but getattrtab doesn't understand this. +(define_attr "length" "" + (cond [(eq_attr "type" "cbranch") + (cond [(eq_attr "short_cbranch_p" "yes") + (const_int 2) + (eq_attr "med_cbranch_p" "yes") + (const_int 6) + (eq_attr "braf_cbranch_p" "yes") + (const_int 12) +;; ??? using pc is not computed transitively. + (ne (match_dup 0) (match_dup 0)) + (const_int 14) + (ne (symbol_ref ("flag_pic")) (const_int 0)) + (const_int 24) + ] (const_int 16)) + (eq_attr "type" "jump") + (cond [(eq_attr "med_branch_p" "yes") + (const_int 2) + (and (ne (symbol_ref "prev_nonnote_insn (insn)") + (const_int 0)) + (and (eq (symbol_ref "GET_CODE (prev_nonnote_insn (insn))") + (symbol_ref "INSN")) + (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))") + (symbol_ref "code_for_indirect_jump_scratch")))) + (cond [(eq_attr "braf_branch_p" "yes") + (const_int 6) + (eq (symbol_ref "flag_pic") (const_int 0)) + (const_int 10) + (ne (symbol_ref "TARGET_SH2") (const_int 0)) + (const_int 10)] (const_int 18)) + (eq_attr "braf_branch_p" "yes") + (const_int 10) +;; ??? using pc is not computed transitively. + (ne (match_dup 0) (match_dup 0)) + (const_int 12) + (ne (symbol_ref ("flag_pic")) (const_int 0)) + (const_int 22) + ] (const_int 14)) + (eq_attr "type" "pt_media") + (if_then_else (ne (symbol_ref "TARGET_SHMEDIA64") (const_int 0)) + (const_int 20) (const_int 12)) + (and (eq_attr "type" "jump_media") + (ne (symbol_ref "TARGET_SH5_CUT2_WORKAROUND") (const_int 0))) + (const_int 8) + ] (if_then_else (ne (symbol_ref "TARGET_SHMEDIA") (const_int 0)) + (const_int 4) + (const_int 2)))) + +;; DFA descriptions for the pipelines + +(include "sh1.md") +(include "shmedia.md") +(include "sh4.md") + +(include "predicates.md") +(include "constraints.md") + +;; Definitions for filling delay slots + +(define_attr "needs_delay_slot" "yes,no" (const_string "no")) + +(define_attr "banked" "yes,no" + (cond [(eq (symbol_ref "sh_loads_bankedreg_p (insn)") + (const_int 1)) + (const_string "yes")] + (const_string "no"))) + +;; ??? This should be (nil) instead of (const_int 0) +(define_attr "hit_stack" "yes,no" + (cond [(eq (symbol_ref "find_regno_note (insn, REG_INC, SP_REG)") + (const_int 0)) + (const_string "no")] + (const_string "yes"))) + +(define_attr "interrupt_function" "no,yes" + (const (symbol_ref "current_function_interrupt"))) + +(define_attr "in_delay_slot" "yes,no" + (cond [(eq_attr "type" "cbranch") (const_string "no") + (eq_attr "type" "pcload,pcload_si") (const_string "no") + (eq_attr "needs_delay_slot" "yes") (const_string "no") + (eq_attr "length" "2") (const_string "yes") + ] (const_string "no"))) + +(define_attr "cond_delay_slot" "yes,no" + (cond [(eq_attr "in_delay_slot" "yes") (const_string "yes") + ] (const_string "no"))) + +(define_attr "is_sfunc" "" + (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0))) + +(define_attr "is_mac_media" "" + (if_then_else (eq_attr "type" "mac_media") (const_int 1) (const_int 0))) + +(define_attr "branch_zero" "yes,no" + (cond [(eq_attr "type" "!cbranch") (const_string "no") + (ne (symbol_ref "(next_active_insn (insn)\ + == (prev_active_insn\ + (XEXP (SET_SRC (PATTERN (insn)), 1))))\ + && get_attr_length (next_active_insn (insn)) == 2") + (const_int 0)) + (const_string "yes")] + (const_string "no"))) + +;; SH4 Double-precision computation with double-precision result - +;; the two halves are ready at different times. +(define_attr "dfp_comp" "yes,no" + (cond [(eq_attr "type" "dfp_arith,dfp_mul,dfp_conv,dfdiv") (const_string "yes")] + (const_string "no"))) + +;; Insns for which the latency of a preceding fp insn is decreased by one. +(define_attr "late_fp_use" "yes,no" (const_string "no")) +;; And feeding insns for which this relevant. +(define_attr "any_fp_comp" "yes,no" + (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv") + (const_string "yes")] + (const_string "no"))) + +(define_attr "any_int_load" "yes,no" + (cond [(eq_attr "type" "load,load_si,pcload,pcload_si") + (const_string "yes")] + (const_string "no"))) + +(define_attr "highpart" "user, ignore, extend, depend, must_split" + (const_string "user")) + +(define_delay + (eq_attr "needs_delay_slot" "yes") + [(eq_attr "in_delay_slot" "yes") (nil) (nil)]) + +;; On the SH and SH2, the rte instruction reads the return pc from the stack, +;; and thus we can't put a pop instruction in its delay slot. +;; ??? On the SH3, the rte instruction does not use the stack, so a pop +;; instruction can go in the delay slot. + +;; Since a normal return (rts) implicitly uses the PR register, +;; we can't allow PR register loads in an rts delay slot. + +(define_delay + (eq_attr "type" "return") + [(and (eq_attr "in_delay_slot" "yes") + (ior (and (eq_attr "interrupt_function" "no") + (eq_attr "type" "!pload,prset")) + (and (eq_attr "interrupt_function" "yes") + (ior + (eq (symbol_ref "TARGET_SH3") (const_int 0)) + (eq_attr "hit_stack" "no") + (eq_attr "banked" "no"))))) (nil) (nil)]) + +;; Since a call implicitly uses the PR register, we can't allow +;; a PR register store in a jsr delay slot. + +(define_delay + (ior (eq_attr "type" "call") (eq_attr "type" "sfunc")) + [(and (eq_attr "in_delay_slot" "yes") + (eq_attr "type" "!pstore,prget")) (nil) (nil)]) + +;; Say that we have annulled true branches, since this gives smaller and +;; faster code when branches are predicted as not taken. + +;; ??? The non-annulled condition should really be "in_delay_slot", +;; but insns that can be filled in non-annulled get priority over insns +;; that can only be filled in anulled. + +(define_delay + (and (eq_attr "type" "cbranch") + (ne (symbol_ref "TARGET_SH2") (const_int 0))) + ;; SH2e has a hardware bug that pretty much prohibits the use of + ;; annuled delay slots. + [(eq_attr "cond_delay_slot" "yes") (and (eq_attr "cond_delay_slot" "yes") + (not (eq_attr "cpu" "sh2e"))) (nil)]) + +;; ------------------------------------------------------------------------- +;; SImode signed integer comparisons +;; ------------------------------------------------------------------------- + +(define_insn "" + [(set (reg:SI T_REG) + (eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r") + (match_operand:SI 1 "logical_operand" "K08,r")) + (const_int 0)))] + "TARGET_SH1" + "tst %1,%0" + [(set_attr "type" "mt_group")]) + +;; ??? Perhaps should only accept reg/constant if the register is reg 0. +;; That would still allow reload to create cmpi instructions, but would +;; perhaps allow forcing the constant into a register when that is better. +;; Probably should use r0 for mem/imm compares, but force constant into a +;; register for pseudo/imm compares. + +(define_insn "cmpeqsi_t" + [(set (reg:SI T_REG) + (eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r") + (match_operand:SI 1 "arith_operand" "N,rI08,r")))] + "TARGET_SH1" + "@ + tst %0,%0 + cmp/eq %1,%0 + cmp/eq %1,%0" + [(set_attr "type" "mt_group")]) + +(define_insn "cmpgtsi_t" + [(set (reg:SI T_REG) + (gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r") + (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))] + "TARGET_SH1" + "@ + cmp/gt %1,%0 + cmp/pl %0" + [(set_attr "type" "mt_group")]) + +(define_insn "cmpgesi_t" + [(set (reg:SI T_REG) + (ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r") + (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))] + "TARGET_SH1" + "@ + cmp/ge %1,%0 + cmp/pz %0" + [(set_attr "type" "mt_group")]) + +;; ------------------------------------------------------------------------- +;; SImode compare and branch +;; ------------------------------------------------------------------------- + +(define_expand "cbranchsi4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:SI 1 "arith_operand" "") + (match_operand:SI 2 "arith_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:SI T_REG))] + "" + "if (TARGET_SHMEDIA) + emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1], + operands[2], operands[3])); + else if (TARGET_CBRANCHDI4) + expand_cbranchsi4 (operands, LAST_AND_UNUSED_RTX_CODE, -1); + else + sh_emit_compare_and_branch (operands, SImode); + DONE;") + +;; ------------------------------------------------------------------------- +;; SImode unsigned integer comparisons +;; ------------------------------------------------------------------------- + +(define_insn_and_split "cmpgeusi_t" + [(set (reg:SI T_REG) + (geu:SI (match_operand:SI 0 "arith_reg_operand" "r") + (match_operand:SI 1 "arith_reg_or_0_operand" "rN")))] + "TARGET_SH1" + "cmp/hs %1,%0" + "&& operands[1] == CONST0_RTX (SImode)" + [(pc)] + " +{ + emit_insn (gen_sett ()); + DONE; +}" + [(set_attr "type" "mt_group")]) + +(define_insn "cmpgtusi_t" + [(set (reg:SI T_REG) + (gtu:SI (match_operand:SI 0 "arith_reg_operand" "r") + (match_operand:SI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "cmp/hi %1,%0" + [(set_attr "type" "mt_group")]) + + +;; ------------------------------------------------------------------------- +;; DImode compare and branch +;; ------------------------------------------------------------------------- + + +;; arith3 patterns don't work well with the sh4-300 branch prediction mechanism. +;; Therefore, we aim to have a set of three branches that go straight to the +;; destination, i.e. only one of them is taken at any one time. +;; This mechanism should also be slightly better for the sh4-200. + +(define_expand "cbranchdi4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:DI 1 "arith_operand" "") + (match_operand:DI 2 "arith_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_dup 4)) + (clobber (reg:SI T_REG))] + "TARGET_CBRANCHDI4 || TARGET_SH2 || TARGET_SHMEDIA" + " +{ + enum rtx_code comparison; + + if (TARGET_SHMEDIA) + { + emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + else if (!TARGET_CBRANCHDI4) + { + sh_emit_compare_and_branch (operands, DImode); + DONE; + } + + else + { + if (expand_cbranchdi4 (operands, LAST_AND_UNUSED_RTX_CODE)) + DONE; + + comparison = prepare_cbranch_operands (operands, DImode, + LAST_AND_UNUSED_RTX_CODE); + if (comparison != GET_CODE (operands[0])) + operands[0] + = gen_rtx_fmt_ee (comparison, VOIDmode, operands[1], operands[2]); + operands[4] = gen_rtx_SCRATCH (SImode); + } +}") + +(define_insn_and_split "cbranchdi4_i" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:DI 1 "arith_operand" "r,r") + (match_operand:DI 2 "arith_operand" "rN,I08")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_scratch:SI 4 "=X,&r")) + (clobber (reg:SI T_REG))] + "TARGET_CBRANCHDI4" + "#" + "&& reload_completed" + [(pc)] + " +{ + if (!expand_cbranchdi4 (operands, GET_CODE (operands[0]))) + FAIL; + DONE; +}") + +;; ------------------------------------------------------------------------- +;; DImode signed integer comparisons +;; ------------------------------------------------------------------------- + +(define_insn "" + [(set (reg:SI T_REG) + (eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r") + (match_operand:DI 1 "arith_operand" "r")) + (const_int 0)))] + "TARGET_SH1" + "* return output_branchy_insn (EQ, \"tst\\t%S1,%S0\;bf\\t%l9\;tst\\t%R1,%R0\", + insn, operands);" + [(set_attr "length" "6") + (set_attr "type" "arith3b")]) + +(define_insn "cmpeqdi_t" + [(set (reg:SI T_REG) + (eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r") + (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))] + "TARGET_SH1" + "@ + tst %S0,%S0\;bf %,Ldi%=\;tst %R0,%R0\\n%,Ldi%=: + cmp/eq %S1,%S0\;bf %,Ldi%=\;cmp/eq %R1,%R0\\n%,Ldi%=:" + [(set_attr "length" "6") + (set_attr "type" "arith3b")]) + +(define_split + [(set (reg:SI T_REG) + (eq:SI (match_operand:DI 0 "arith_reg_operand" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "")))] +;; If we applied this split when not optimizing, it would only be +;; applied during the machine-dependent reorg, when no new basic blocks +;; may be created. + "TARGET_SH1 && reload_completed && optimize" + [(set (reg:SI T_REG) (eq:SI (match_dup 2) (match_dup 3))) + (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 6)) + (pc))) + (set (reg:SI T_REG) (eq:SI (match_dup 4) (match_dup 5))) + (match_dup 6)] + " +{ + operands[2] + = gen_rtx_REG (SImode, + true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + operands[3] + = (operands[1] == const0_rtx + ? const0_rtx + : gen_rtx_REG (SImode, + true_regnum (operands[1]) + + (TARGET_LITTLE_ENDIAN ? 1 : 0))); + operands[4] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_lowpart (SImode, operands[1]); + operands[6] = gen_label_rtx (); +}") + +(define_insn "cmpgtdi_t" + [(set (reg:SI T_REG) + (gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r") + (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))] + "TARGET_SH2" + "@ + cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/gt\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=: + tst\\t%S0,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/pl\\t%S0\;cmp/hi\\t%S0,%R0\\n%,Ldi%=:" + [(set_attr "length" "8") + (set_attr "type" "arith3")]) + +(define_insn "cmpgedi_t" + [(set (reg:SI T_REG) + (ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r") + (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))] + "TARGET_SH2" + "@ + cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/ge\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=: + cmp/pz\\t%S0" + [(set_attr "length" "8,2") + (set_attr "type" "arith3,mt_group")]) + +;; ------------------------------------------------------------------------- +;; DImode unsigned integer comparisons +;; ------------------------------------------------------------------------- + +(define_insn "cmpgeudi_t" + [(set (reg:SI T_REG) + (geu:SI (match_operand:DI 0 "arith_reg_operand" "r") + (match_operand:DI 1 "arith_reg_operand" "r")))] + "TARGET_SH2" + "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hs\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:" + [(set_attr "length" "8") + (set_attr "type" "arith3")]) + +(define_insn "cmpgtudi_t" + [(set (reg:SI T_REG) + (gtu:SI (match_operand:DI 0 "arith_reg_operand" "r") + (match_operand:DI 1 "arith_reg_operand" "r")))] + "TARGET_SH2" + "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hi\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:" + [(set_attr "length" "8") + (set_attr "type" "arith3")]) + +(define_insn "cmpeqsi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (match_operand:SI 1 "logical_operand" "%r") + (match_operand:SI 2 "cmp_operand" "Nr")))] + "TARGET_SHMEDIA" + "cmpeq %1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +(define_insn "cmpeqdi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (match_operand:DI 1 "register_operand" "%r") + (match_operand:DI 2 "cmp_operand" "Nr")))] + "TARGET_SHMEDIA" + "cmpeq %1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +(define_insn "cmpgtsi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gt:SI (match_operand:SI 1 "cmp_operand" "Nr") + (match_operand:SI 2 "cmp_operand" "rN")))] + "TARGET_SHMEDIA" + "cmpgt %N1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +(define_insn "cmpgtdi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gt:SI (match_operand:DI 1 "arith_reg_or_0_operand" "Nr") + (match_operand:DI 2 "arith_reg_or_0_operand" "rN")))] + "TARGET_SHMEDIA" + "cmpgt %N1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +(define_insn "cmpgtusi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gtu:SI (match_operand:SI 1 "cmp_operand" "Nr") + (match_operand:SI 2 "cmp_operand" "rN")))] + "TARGET_SHMEDIA" + "cmpgtu %N1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +(define_insn "cmpgtudi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gtu:SI (match_operand:DI 1 "arith_reg_or_0_operand" "Nr") + (match_operand:DI 2 "arith_reg_or_0_operand" "rN")))] + "TARGET_SHMEDIA" + "cmpgtu %N1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +; These two patterns are for combine. +(define_insn "*cmpne0sisi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (ne:SI (match_operand:SI 1 "arith_reg_operand" "r") (const_int 0)))] + "TARGET_SHMEDIA" + "cmpgtu %1,r63,%0" + [(set_attr "type" "cmp_media")]) + +;; ------------------------------------------------------------------------- +;; Conditional move instructions +;; ------------------------------------------------------------------------- + +;; The insn names may seem reversed, but note that cmveq performs the move +;; if op1 == 0, and cmvne does it if op1 != 0. + +(define_insn "movdicc_false" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (if_then_else:DI (eq (match_operand:DI 1 "arith_reg_operand" "r") + (const_int 0)) + (match_operand:DI 2 "arith_reg_or_0_operand" "rN") + (match_operand:DI 3 "arith_reg_operand" "0")))] + "TARGET_SHMEDIA" + "cmveq %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "movdicc_true" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (if_then_else:DI (ne (match_operand:DI 1 "arith_reg_operand" "r") + (const_int 0)) + (match_operand:DI 2 "arith_reg_or_0_operand" "rN") + (match_operand:DI 3 "arith_reg_operand" "0")))] + "TARGET_SHMEDIA" + "cmvne %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_peephole2 + [(set (match_operand:DI 0 "arith_reg_dest" "") + (if_then_else:DI (match_operator 3 "equality_comparison_operator" + [(match_operand:DI 1 "arith_reg_operand" "") + (const_int 0)]) + (match_operand:DI 2 "arith_reg_dest" "") + (match_dup 0))) + (set (match_dup 2) (match_dup 0))] + "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (if_then_else:DI (match_dup 3) (match_dup 0) (match_dup 2)))] + " +{ + operands[3] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[3])), + VOIDmode, operands[1], CONST0_RTX (DImode)); +}") + +(define_peephole2 + [(set (match_operand:DI 0 "general_movdst_operand" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "")) + (set (match_operand:DI 2 "arith_reg_dest" "") + (if_then_else:DI (match_operator 4 "equality_comparison_operator" + [(match_operand:DI 3 "arith_reg_operand" "") + (const_int 0)]) + (match_dup 0) + (match_dup 2)))] + "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (if_then_else:DI (match_dup 4) (match_dup 1) (match_dup 2)))] + "") + +(define_expand "movdicc" + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (match_operand 1 "comparison_operator" "") + (match_operand:DI 2 "register_operand" "") + (match_operand:DI 3 "register_operand" "")))] + "TARGET_SHMEDIA" + " +{ + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && GET_MODE (XEXP (operands[1], 0)) == DImode + && XEXP (operands[1], 1) == const0_rtx) + ; + else + { + if (!can_create_pseudo_p ()) + FAIL; + + operands[1] = sh_emit_cheap_store_flag (GET_MODE (operands[0]), + GET_CODE (operands[1]), + XEXP (operands[1], 0), + XEXP (operands[1], 1)); + if (!operands[1]) + FAIL; + } +}") + +;; Add SImode variants for cmveq / cmvne to compensate for not promoting +;; SImode to DImode. +(define_insn "movsicc_false" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (if_then_else:SI (eq (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 0)) + (match_operand:SI 2 "arith_reg_or_0_operand" "rN") + (match_operand:SI 3 "arith_reg_operand" "0")))] + "TARGET_SHMEDIA" + "cmveq %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "movsicc_true" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (if_then_else:SI (ne (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 0)) + (match_operand:SI 2 "arith_reg_or_0_operand" "rN") + (match_operand:SI 3 "arith_reg_operand" "0")))] + "TARGET_SHMEDIA" + "cmvne %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (if_then_else:SI (match_operator 3 "equality_comparison_operator" + [(match_operand:SI 1 "arith_reg_operand" "") + (const_int 0)]) + (match_operand:SI 2 "arith_reg_dest" "") + (match_dup 0))) + (set (match_dup 2) (match_dup 0))] + "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (if_then_else:SI (match_dup 3) (match_dup 0) (match_dup 2)))] + " +{ + operands[3] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[3])), + VOIDmode, operands[1], CONST0_RTX (SImode)); +}") + +(define_peephole2 + [(set (match_operand:SI 0 "general_movdst_operand" "") + (match_operand:SI 1 "arith_reg_or_0_operand" "")) + (set (match_operand:SI 2 "arith_reg_dest" "") + (if_then_else:SI (match_operator 4 "equality_comparison_operator" + [(match_operand:SI 3 "arith_reg_operand" "") + (const_int 0)]) + (match_dup 0) + (match_dup 2)))] + "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0]) + && (!REG_P (operands[1]) || GENERAL_REGISTER_P (REGNO (operands[1])))" + [(set (match_dup 2) + (if_then_else:SI (match_dup 4) (match_dup 1) (match_dup 2)))] + " +{ + replace_rtx (operands[4], operands[0], operands[1]); +}") + +(define_peephole2 + [(set (match_operand 0 "any_register_operand" "") + (match_operand 1 "any_register_operand" "")) + (set (match_operand 2 "any_register_operand" "") (match_operand 3 "" "")) + (set (match_operand 4 "" "") (match_operand 5 "" ""))] + "(HARD_REGNO_NREGS (REGNO (operands[0]), GET_MODE (operands[2])) + <= HARD_REGNO_NREGS (REGNO (operands[0]), GET_MODE (operands[0]))) + && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[2]) + && ! FIND_REG_INC_NOTE (peep2_next_insn (2), operands[0]) + && ! FIND_REG_INC_NOTE (peep2_next_insn (2), operands[2]) + && ! reg_overlap_mentioned_p (operands[0], operands[3]) + && ! reg_overlap_mentioned_p (operands[2], operands[0]) + && ! reg_overlap_mentioned_p (operands[0], operands[1]) + && (REGNO_REG_CLASS (REGNO (operands[0])) + == REGNO_REG_CLASS (REGNO (operands[2]))) + && (REGNO_REG_CLASS (REGNO (operands[1])) + == REGNO_REG_CLASS (REGNO (operands[0])))" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + " +{ + rtx set1, set2, insn2; + rtx replacements[4]; + + /* We want to replace occurrences of operands[0] with operands[1] and + operands[2] with operands[0] in operands[4]/operands[5]. + Doing just two replace_rtx calls naively would result in the second + replacement undoing all that the first did if operands[1] and operands[2] + are identical, so we must do this simultaneously. */ + replacements[0] = operands[0]; + replacements[1] = operands[1]; + replacements[2] = operands[2]; + replacements[3] = operands[0]; + if (!replace_n_hard_rtx (operands[5], replacements, 2, 0) + || !replace_n_hard_rtx (operands[4], replacements, 2, 0) + || !replace_n_hard_rtx (operands[2], replacements, 2, 0)) + FAIL; + + operands[5] = replace_n_hard_rtx (operands[5], replacements, 2, 1); + replace_n_hard_rtx (operands[4], replacements, 2, 1); + operands[2] = replace_n_hard_rtx (operands[2], replacements, 2, 1); + /* The operands array is aliased to recog_data.operand, which gets + clobbered by extract_insn, so finish with it now. */ + set1 = gen_rtx_SET (VOIDmode, operands[2], operands[3]); + set2 = gen_rtx_SET (VOIDmode, operands[4], operands[5]); + /* ??? The last insn might be a jump insn, but the generic peephole2 code + always uses emit_insn. */ + /* Check that we don't violate matching constraints or earlyclobbers. */ + extract_insn (emit_insn (set1)); + if (! constrain_operands (1)) + goto failure; + insn2 = emit (set2); + if (GET_CODE (insn2) == BARRIER) + goto failure; + extract_insn (insn2); + if (! constrain_operands (1)) + { + rtx tmp; + failure: + tmp = replacements[0]; + replacements[0] = replacements[1]; + replacements[1] = tmp; + tmp = replacements[2]; + replacements[2] = replacements[3]; + replacements[3] = tmp; + replace_n_hard_rtx (SET_DEST (set1), replacements, 2, 1); + replace_n_hard_rtx (SET_DEST (set2), replacements, 2, 1); + replace_n_hard_rtx (SET_SRC (set2), replacements, 2, 1); + FAIL; + } + DONE; +}") + +;; The register allocator is rather clumsy in handling multi-way conditional +;; moves, so allow the combiner to make them, and we split them up after +;; reload. */ +(define_insn_and_split "*movsicc_umin" + [(set (match_operand:SI 0 "arith_reg_dest" "=&r") + (umin:SI (if_then_else:SI + (eq (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 0)) + (match_operand:SI 2 "arith_reg_or_0_operand" "rN") + (match_operand:SI 3 "register_operand" "0")) + (match_operand:SI 4 "arith_reg_or_0_operand" "r"))) + (clobber (match_scratch:SI 5 "=&r"))] + "TARGET_SHMEDIA && !can_create_pseudo_p ()" + "#" + "TARGET_SHMEDIA && reload_completed" + [(pc)] + " +{ + emit_insn (gen_movsicc_false (operands[0], operands[1], operands[2], + operands[3])); + emit_insn (gen_cmpgtusi_media (operands[5], operands[4], operands[0])); + emit_insn (gen_movsicc_false (operands[0], operands[5], operands[4], + operands[0])); + DONE; +}") + +(define_insn "*movsicc_t_false" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (match_operand:SI 1 "general_movsrc_operand" "r,I08") + (match_operand:SI 2 "arith_reg_operand" "0,0")))] + "TARGET_PRETEND_CMOVE + && (arith_reg_operand (operands[1], SImode) + || (immediate_operand (operands[1], SImode) + && satisfies_constraint_I08 (operands[1])))" + "bt 0f\;mov %1,%0\\n0:" + [(set_attr "type" "mt_group,arith") ;; poor approximation + (set_attr "length" "4")]) + +(define_insn "*movsicc_t_true" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (if_then_else (ne (reg:SI T_REG) (const_int 0)) + (match_operand:SI 1 "general_movsrc_operand" "r,I08") + (match_operand:SI 2 "arith_reg_operand" "0,0")))] + "TARGET_PRETEND_CMOVE + && (arith_reg_operand (operands[1], SImode) + || (immediate_operand (operands[1], SImode) + && satisfies_constraint_I08 (operands[1])))" + "bf 0f\;mov %1,%0\\n0:" + [(set_attr "type" "mt_group,arith") ;; poor approximation + (set_attr "length" "4")]) + +(define_expand "movsicc" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "arith_reg_or_0_operand" "") + (match_operand:SI 3 "arith_reg_operand" "")))] + "TARGET_SHMEDIA || TARGET_PRETEND_CMOVE" + " +{ + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && GET_MODE (XEXP (operands[1], 0)) == SImode + && (TARGET_SHMEDIA + || (REG_P (XEXP (operands[1], 0)) + && REGNO (XEXP (operands[1], 0)) == T_REG)) + && XEXP (operands[1], 1) == const0_rtx) + ; + + else if (TARGET_PRETEND_CMOVE) + { + enum rtx_code code = GET_CODE (operands[1]); + enum rtx_code new_code = code; + rtx op0 = XEXP (operands[1], 0); + rtx op1 = XEXP (operands[1], 1); + + if (! currently_expanding_to_rtl) + FAIL; + switch (code) + { + case LT: case LE: case LEU: case LTU: + if (GET_MODE_CLASS (GET_MODE (op0)) != MODE_INT) + break; + case NE: + new_code = reverse_condition (code); + break; + case EQ: case GT: case GE: case GEU: case GTU: + break; + default: + FAIL; + } + sh_emit_scc_to_t (new_code, op0, op1); + operands[1] = gen_rtx_fmt_ee (new_code == code ? NE : EQ, VOIDmode, + gen_rtx_REG (SImode, T_REG), const0_rtx); + } + else + { + if (!can_create_pseudo_p ()) + FAIL; + + operands[1] = sh_emit_cheap_store_flag (GET_MODE (operands[0]), + GET_CODE (operands[1]), + XEXP (operands[1], 0), + XEXP (operands[1], 1)); + if (!operands[1]) + FAIL; + } +}") + +(define_expand "movqicc" + [(set (match_operand:QI 0 "register_operand" "") + (if_then_else:QI (match_operand 1 "comparison_operator" "") + (match_operand:QI 2 "register_operand" "") + (match_operand:QI 3 "register_operand" "")))] + "TARGET_SHMEDIA" + " +{ + operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0); + operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); + operands[3] = simplify_gen_subreg (SImode, operands[3], QImode, 0); + emit (gen_movsicc (operands[0], operands[1], operands[2], operands[3])); + DONE; +}") + +;; ------------------------------------------------------------------------- +;; Addition instructions +;; ------------------------------------------------------------------------- + +(define_expand "adddi3" + [(set (match_operand:DI 0 "arith_reg_operand" "") + (plus:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "arith_operand" "")))] + "" + " +{ + if (TARGET_SH1) + { + if (!can_create_pseudo_p () && ! arith_reg_operand (operands[2], DImode)) + FAIL; + operands[2] = force_reg (DImode, operands[2]); + emit_insn (gen_adddi3_compact (operands[0], operands[1], operands[2])); + DONE; + } +}") + +(define_insn "*adddi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (plus:DI (match_operand:DI 1 "arith_reg_operand" "%r,r") + (match_operand:DI 2 "arith_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + add %1, %2, %0 + addi %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*adddisi3_media" + [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r,r") 0) + (plus:DI (match_operand:DI 1 "arith_reg_operand" "%r,r") + (match_operand:DI 2 "arith_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + add.l %1, %2, %0 + addi.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "adddi3z_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "extend_reg_operand" "r") + (match_operand:SI 2 "extend_reg_or_0_operand" "rN"))))] + "TARGET_SHMEDIA" + "addz.l %1, %N2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "adddi3_compact" + [(set (match_operand:DI 0 "arith_reg_dest" "=&r") + (plus:DI (match_operand:DI 1 "arith_reg_operand" "%0") + (match_operand:DI 2 "arith_reg_operand" "r"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + [(set_attr "length" "6")]) + +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (plus:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "arith_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && reload_completed" + [(const_int 0)] + " +{ + rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]); + high0 = gen_rtx_REG (SImode, + true_regnum (operands[0]) + + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + high2 = gen_rtx_REG (SImode, + true_regnum (operands[2]) + + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + emit_insn (gen_clrt ()); + emit_insn (gen_addc (low0, low0, gen_lowpart (SImode, operands[2]))); + emit_insn (gen_addc1 (high0, high0, high2)); + DONE; +}") + +(define_insn "addc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")) + (reg:SI T_REG))) + (set (reg:SI T_REG) + (ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))] + "TARGET_SH1" + "addc %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "addc1" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")) + (reg:SI T_REG))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "addc %2,%0" + [(set_attr "type" "arith")]) + +(define_expand "addsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (plus:SI (match_operand:SI 1 "arith_operand" "") + (match_operand:SI 2 "arith_operand" "")))] + "" + " +{ + if (TARGET_SHMEDIA) + operands[1] = force_reg (SImode, operands[1]); +}") + +(define_insn "addsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (plus:SI (match_operand:SI 1 "extend_reg_operand" "%r,r") + (match_operand:SI 2 "arith_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + add.l %1, %2, %0 + addi.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "addsidi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (sign_extend:DI (plus:SI (match_operand:SI 1 "extend_reg_operand" + "%r,r") + (match_operand:SI 2 "arith_operand" + "r,I10"))))] + "TARGET_SHMEDIA" + "@ + add.l %1, %2, %0 + addi.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "*addsi3_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus:SI (match_operand:SI 1 "arith_operand" "%0") + (match_operand:SI 2 "arith_operand" "rI08")))] + "TARGET_SH1" + "add %2,%0" + [(set_attr "type" "arith")]) + +;; ------------------------------------------------------------------------- +;; Subtraction instructions +;; ------------------------------------------------------------------------- + +(define_expand "subdi3" + [(set (match_operand:DI 0 "arith_reg_operand" "") + (minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "") + (match_operand:DI 2 "arith_reg_operand" "")))] + "" + " +{ + if (TARGET_SH1) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_subdi3_compact (operands[0], operands[1], operands[2])); + DONE; + } +}") + +(define_insn "*subdi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rN") + (match_operand:DI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "sub %N1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "subdisi3_media" + [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0) + (minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rN") + (match_operand:DI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "sub.l %N1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "subdi3_compact" + [(set (match_operand:DI 0 "arith_reg_dest" "=&r") + (minus:DI (match_operand:DI 1 "arith_reg_operand" "0") + (match_operand:DI 2 "arith_reg_operand" "r"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + [(set_attr "length" "6")]) + +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (minus:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "arith_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && reload_completed" + [(const_int 0)] + " +{ + rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]); + high0 = gen_rtx_REG (SImode, + true_regnum (operands[0]) + + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + high2 = gen_rtx_REG (SImode, + true_regnum (operands[2]) + + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + emit_insn (gen_clrt ()); + emit_insn (gen_subc (low0, low0, gen_lowpart (SImode, operands[2]))); + emit_insn (gen_subc1 (high0, high0, high2)); + DONE; +}") + +(define_insn "subc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")) + (reg:SI T_REG))) + (set (reg:SI T_REG) + (gtu:SI (minus:SI (minus:SI (match_dup 1) (match_dup 2)) + (reg:SI T_REG)) + (match_dup 1)))] + "TARGET_SH1" + "subc %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "subc1" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")) + (reg:SI T_REG))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "subc %2,%0" + [(set_attr "type" "arith")]) + +;; life_analysis thinks rn is live before subc rn,rn, so make a special +;; pattern for this case. This helps multimedia applications that compute +;; the sum of absolute differences. +(define_insn "mov_neg_si_t" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") (neg:SI (reg:SI T_REG)))] + "TARGET_SH1" + "subc %0,%0" + [(set_attr "type" "arith")]) + +(define_insn "*subsi3_internal" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")))] + "TARGET_SH1" + "sub %2,%0" + [(set_attr "type" "arith")]) + +(define_insn_and_split "*subsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (match_operand:SI 1 "minuend_operand" "rN") + (match_operand:SI 2 "extend_reg_operand" "r")))] + "TARGET_SHMEDIA + && (operands[1] != constm1_rtx + || (GET_CODE (operands[2]) != TRUNCATE + && GET_CODE (operands[2]) != SUBREG))" + "sub.l %N1, %2, %0" + "operands[1] == constm1_rtx" + [(set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))] + "" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (zero_extend:SI (subreg:QI (not:SI (subreg:SI (match_operand:QI 1 + "general_extend_operand" + "") 0)) 0)))] + "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN" + [(set (match_dup 0) (zero_extend:SI (match_dup 1))) + (set (match_dup 0) (xor:SI (match_dup 0) (const_int 255)))] + "") + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (zero_extend:SI (subreg:QI (not:SI (subreg:SI (match_operand:QI 1 + "general_extend_operand" + "") 0)) 3)))] + "TARGET_SHMEDIA && ! TARGET_LITTLE_ENDIAN" + [(set (match_dup 0) (zero_extend:SI (match_dup 1))) + (set (match_dup 0) (xor:SI (match_dup 0) (const_int 255)))] + "") +;; Convert `constant - reg' to `neg rX; add rX, #const' since this +;; will sometimes save one instruction. Otherwise we might get +;; `mov #const, rY; sub rY,rX; mov rX, rY' if the source and dest regs +;; are the same. + +(define_expand "subsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (minus:SI (match_operand:SI 1 "arith_operand" "") + (match_operand:SI 2 "arith_reg_operand" "")))] + "" + " +{ + if (TARGET_SH1 && CONST_INT_P (operands[1])) + { + emit_insn (gen_negsi2 (operands[0], operands[2])); + emit_insn (gen_addsi3 (operands[0], operands[0], operands[1])); + DONE; + } + if (TARGET_SHMEDIA) + { + if (!can_create_pseudo_p () + && ! arith_reg_or_0_operand (operands[1], SImode)) + FAIL; + if (operands[1] != const0_rtx && GET_CODE (operands[1]) != SUBREG) + operands[1] = force_reg (SImode, operands[1]); + } +}") + +;; ------------------------------------------------------------------------- +;; Division instructions +;; ------------------------------------------------------------------------- + +;; We take advantage of the library routines which don't clobber as many +;; registers as a normal function call would. + +;; The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it +;; also has an effect on the register that holds the address of the sfunc. +;; To make this work, we have an extra dummy insn that shows the use +;; of this register for reorg. + +(define_insn "use_sfunc_addr" + [(set (reg:SI PR_REG) + (unspec:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_SFUNC))] + "TARGET_SH1 && check_use_sfunc_addr (insn, operands[0])" + "" + [(set_attr "length" "0")]) + +(define_insn "udivsi3_sh2a" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (udiv:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "z")))] + "TARGET_SH2A" + "divu %2,%1" + [(set_attr "type" "arith") + (set_attr "in_delay_slot" "no")]) + +;; We must use a pseudo-reg forced to reg 0 in the SET_DEST rather than +;; hard register 0. If we used hard register 0, then the next instruction +;; would be a move from hard register 0 to a pseudo-reg. If the pseudo-reg +;; gets allocated to a stack slot that needs its address reloaded, then +;; there is nothing to prevent reload from using r0 to reload the address. +;; This reload would clobber the value in r0 we are trying to store. +;; If we let reload allocate r0, then this problem can never happen. + +(define_insn "udivsi3_i1" + [(set (match_operand:SI 0 "register_operand" "=z") + (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R4_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH1 && ! TARGET_SH4" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +; Since shmedia-nofpu code could be linked against shcompact code, and +; the udivsi3 libcall has the same name, we must consider all registers +; clobbered that are in the union of the registers clobbered by the +; shmedia and the shcompact implementation. Note, if the shcompact +; implementation actually used shcompact code, we'd need to clobber +; also r23 and fr23. +(define_insn "udivsi3_i1_media" + [(set (match_operand:SI 0 "register_operand" "=z") + (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_MEDIA_REG)) + (clobber (reg:SI PR_MEDIA_REG)) + (clobber (reg:SI R20_REG)) + (clobber (reg:SI R21_REG)) + (clobber (reg:SI R22_REG)) + (clobber (reg:DI TR0_REG)) + (clobber (reg:DI TR1_REG)) + (clobber (reg:DI TR2_REG)) + (use (match_operand 1 "target_reg_operand" "b"))] + "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)" + "blink %1, r18" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "udivsi3_i4_media" + [(set (match_dup 3) + (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) + (set (match_dup 4) + (zero_extend:DI (match_operand:SI 2 "register_operand" ""))) + (set (match_dup 5) (float:DF (match_dup 3))) + (set (match_dup 6) (float:DF (match_dup 4))) + (set (match_dup 7) (div:DF (match_dup 5) (match_dup 6))) + (set (match_dup 8) (fix:DI (match_dup 7))) + (set (match_operand:SI 0 "register_operand" "") + (truncate:SI (match_dup 8)))] + "TARGET_SHMEDIA_FPU" + " +{ + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DFmode); + operands[6] = gen_reg_rtx (DFmode); + operands[7] = gen_reg_rtx (DFmode); + operands[8] = gen_reg_rtx (DImode); +}") + +(define_insn "udivsi3_i4" + [(set (match_operand:SI 0 "register_operand" "=y") + (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:DF DR0_REG)) + (clobber (reg:DF DR2_REG)) + (clobber (reg:DF DR4_REG)) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (use (reg:PSI FPSCR_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH4 && ! TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "fp_mode" "double") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "udivsi3_i4_single" + [(set (match_operand:SI 0 "register_operand" "=y") + (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:DF DR0_REG)) + (clobber (reg:DF DR2_REG)) + (clobber (reg:DF DR4_REG)) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "(TARGET_HARD_SH4 || TARGET_SHCOMPACT) && TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "udivsi3_i4_int" + [(set (match_operand:SI 0 "register_operand" "=z") + (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI MACL_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH1" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + + +(define_expand "udivsi3" + [(set (match_dup 3) (symbol_ref:SI "__udivsi3")) + (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" "")) + (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" "")) + (parallel [(set (match_operand:SI 0 "register_operand" "") + (udiv:SI (reg:SI R4_REG) + (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R4_REG)) + (use (match_dup 3))])] + "" + " +{ + rtx last; + + operands[3] = gen_reg_rtx (Pmode); + /* Emit the move of the address to a pseudo outside of the libcall. */ + if (TARGET_DIVIDE_CALL_TABLE) + { + /* libgcc2:__udivmoddi4 is not supposed to use an actual division, since + that causes problems when the divide code is supposed to come from a + separate library. Division by zero is undefined, so dividing 1 can be + implemented by comparing with the divisor. */ + if (operands[1] == const1_rtx && currently_expanding_to_rtl) + { + rtx test = gen_rtx_GEU (VOIDmode, operands[1], operands[2]); + emit_insn (gen_cstoresi4 (operands[0], test, + operands[1], operands[2])); + DONE; + } + else if (operands[2] == const0_rtx) + { + emit_move_insn (operands[0], operands[2]); + DONE; + } + function_symbol (operands[3], \"__udivsi3_i4i\", SFUNC_GOT); + last = gen_udivsi3_i4_int (operands[0], operands[3]); + } + else if (TARGET_DIVIDE_CALL_FP) + { + function_symbol (operands[3], \"__udivsi3_i4\", SFUNC_STATIC); + if (TARGET_FPU_SINGLE) + last = gen_udivsi3_i4_single (operands[0], operands[3]); + else + last = gen_udivsi3_i4 (operands[0], operands[3]); + } + else if (TARGET_SHMEDIA_FPU) + { + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_udivsi3_i4_media (operands[0], operands[1], operands[2])); + DONE; + } + else if (TARGET_SH2A) + { + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_udivsi3_sh2a (operands[0], operands[1], operands[2])); + DONE; + } + else if (TARGET_SH5) + { + function_symbol (operands[3], + TARGET_FPU_ANY ? \"__udivsi3_i4\" : \"__udivsi3\", + SFUNC_STATIC); + + if (TARGET_SHMEDIA) + last = gen_udivsi3_i1_media (operands[0], operands[3]); + else if (TARGET_FPU_ANY) + last = gen_udivsi3_i4_single (operands[0], operands[3]); + else + last = gen_udivsi3_i1 (operands[0], operands[3]); + } + else + { + function_symbol (operands[3], \"__udivsi3\", SFUNC_STATIC); + last = gen_udivsi3_i1 (operands[0], operands[3]); + } + emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]); + emit_insn (last); + DONE; +}") + +(define_insn "divsi3_sh2a" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (div:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "z")))] + "TARGET_SH2A" + "divs %2,%1" + [(set_attr "type" "arith") + (set_attr "in_delay_slot" "no")]) + +(define_insn "divsi3_i1" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R3_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH1 && ! TARGET_SH4" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "divsi3_i1_media" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_MEDIA_REG)) + (clobber (reg:SI PR_MEDIA_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R20_REG)) + (clobber (reg:SI R21_REG)) + (clobber (reg:SI TR0_REG)) + (use (match_operand 1 "target_reg_operand" "b"))] + "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)" + "blink %1, r18" + [(set_attr "type" "sfunc")]) + +(define_insn "divsi3_media_2" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_MEDIA_REG)) + (clobber (reg:SI PR_MEDIA_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R21_REG)) + (clobber (reg:SI TR0_REG)) + (use (reg:SI R20_REG)) + (use (match_operand 1 "target_reg_operand" "b"))] + "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)" + "blink %1, r18" + [(set_attr "type" "sfunc")]) + +;; This pattern acts as a placeholder for -mdiv=inv:call to carry +;; hard reg clobbers and data dependencies that we need when we want +;; to rematerialize the division into a call. +(define_insn_and_split "divsi_inv_call" + [(set (match_operand:SI 0 "register_operand" "=r") + (div:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r"))) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (clobber (reg:SI T_MEDIA_REG)) + (clobber (reg:SI PR_MEDIA_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R21_REG)) + (clobber (reg:SI TR0_REG)) + (clobber (reg:SI R20_REG)) + (use (match_operand:SI 3 "register_operand" "r"))] + "TARGET_SHMEDIA" + "#" + "&& (high_life_started || reload_completed)" + [(set (match_dup 0) (match_dup 3))] + "" + [(set_attr "highpart" "must_split")]) + +;; This is the combiner pattern for -mdiv=inv:call . +(define_insn_and_split "*divsi_inv_call_combine" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r"))) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (clobber (reg:SI T_MEDIA_REG)) + (clobber (reg:SI PR_MEDIA_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R21_REG)) + (clobber (reg:SI TR0_REG)) + (clobber (reg:SI R20_REG)) + (use (unspec:SI [(match_dup 1) + (match_operand:SI 3 "" "") + (unspec:SI [(match_operand:SI 4 "" "") + (match_dup 3) + (match_operand:DI 5 "" "")] + UNSPEC_DIV_INV_M2) + (match_operand:DI 6 "" "") + (const_int 0) + (const_int 0)] + UNSPEC_DIV_INV_M3))] + "TARGET_SHMEDIA" + "#" + "&& (high_life_started || reload_completed)" + [(pc)] + " +{ + const char *name = sh_divsi3_libfunc; + enum sh_function_kind kind = SFUNC_GOT; + rtx sym; + + emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, R5_REG), operands[2]); + while (TARGET_DIVIDE_INV_CALL2) + { + rtx x = operands[3]; + + if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_DIV_INV_M1) + break; + x = XVECEXP (x, 0, 0); + name = \"__sdivsi3_2\"; + kind = SFUNC_STATIC; + emit_move_insn (gen_rtx_REG (DImode, R20_REG), x); + break; + } + sym = function_symbol (NULL, name, kind); + emit_insn (gen_divsi3_media_2 (operands[0], sym)); + DONE; +}" + [(set_attr "highpart" "must_split")]) + +(define_expand "divsi3_i4_media" + [(set (match_dup 3) (float:DF (match_operand:SI 1 "register_operand" "r"))) + (set (match_dup 4) (float:DF (match_operand:SI 2 "register_operand" "r"))) + (set (match_dup 5) (div:DF (match_dup 3) (match_dup 4))) + (set (match_operand:SI 0 "register_operand" "=r") + (fix:SI (match_dup 5)))] + "TARGET_SHMEDIA_FPU" + " +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = gen_reg_rtx (DFmode); + operands[5] = gen_reg_rtx (DFmode); +}") + +(define_insn "divsi3_i4" + [(set (match_operand:SI 0 "register_operand" "=y") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI PR_REG)) + (clobber (reg:DF DR0_REG)) + (clobber (reg:DF DR2_REG)) + (use (reg:PSI FPSCR_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH4 && ! TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "fp_mode" "double") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "divsi3_i4_single" + [(set (match_operand:SI 0 "register_operand" "=y") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI PR_REG)) + (clobber (reg:DF DR0_REG)) + (clobber (reg:DF DR2_REG)) + (clobber (reg:SI R2_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "(TARGET_HARD_SH4 || TARGET_SHCOMPACT) && TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "divsi3_i4_int" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI MACL_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH1" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "divsi3" + [(set (match_dup 3) (symbol_ref:SI "__sdivsi3")) + (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" "")) + (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" "")) + (parallel [(set (match_operand:SI 0 "register_operand" "") + (div:SI (reg:SI R4_REG) + (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R3_REG)) + (use (match_dup 3))])] + "" + " +{ + rtx last; + + operands[3] = gen_reg_rtx (Pmode); + /* Emit the move of the address to a pseudo outside of the libcall. */ + if (TARGET_DIVIDE_CALL_TABLE) + { + function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT); + last = gen_divsi3_i4_int (operands[0], operands[3]); + } + else if (TARGET_DIVIDE_CALL_FP) + { + function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC); + if (TARGET_FPU_SINGLE) + last = gen_divsi3_i4_single (operands[0], operands[3]); + else + last = gen_divsi3_i4 (operands[0], operands[3]); + } + else if (TARGET_SH2A) + { + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_divsi3_sh2a (operands[0], operands[1], operands[2])); + DONE; + } + else if (TARGET_DIVIDE_INV) + { + rtx dividend = operands[1]; + rtx divisor = operands[2]; + rtx tab_base; + rtx nsb_res = gen_reg_rtx (DImode); + rtx norm64 = gen_reg_rtx (DImode); + rtx tab_ix = gen_reg_rtx (DImode); + rtx norm32 = gen_reg_rtx (SImode); + rtx i92 = force_reg (DImode, GEN_INT (92)); + rtx scratch0a = gen_reg_rtx (DImode); + rtx scratch0b = gen_reg_rtx (DImode); + rtx inv0 = gen_reg_rtx (SImode); + rtx scratch1a = gen_reg_rtx (DImode); + rtx scratch1b = gen_reg_rtx (DImode); + rtx shift = gen_reg_rtx (DImode); + rtx i2p27, i43; + rtx inv1 = gen_reg_rtx (SImode); + rtx scratch2a = gen_reg_rtx (DImode); + rtx scratch2b = gen_reg_rtx (SImode); + rtx inv2 = gen_reg_rtx (SImode); + rtx scratch3a = gen_reg_rtx (DImode); + rtx scratch3b = gen_reg_rtx (DImode); + rtx scratch3c = gen_reg_rtx (DImode); + rtx scratch3d = gen_reg_rtx (SImode); + rtx scratch3e = gen_reg_rtx (DImode); + rtx result = gen_reg_rtx (SImode); + + if (! arith_reg_or_0_operand (dividend, SImode)) + dividend = force_reg (SImode, dividend); + if (! arith_reg_operand (divisor, SImode)) + divisor = force_reg (SImode, divisor); + if (flag_pic && Pmode != DImode) + { + tab_base = gen_rtx_SYMBOL_REF (Pmode, \"__div_table\"); + tab_base = gen_datalabel_ref (tab_base); + tab_base = force_reg (DImode, gen_rtx_SIGN_EXTEND (DImode, tab_base)); + } + else + { + tab_base = gen_rtx_SYMBOL_REF (DImode, \"__div_table\"); + tab_base = gen_datalabel_ref (tab_base); + tab_base = force_reg (DImode, tab_base); + } + if (TARGET_DIVIDE_INV20U) + i2p27 = force_reg (DImode, GEN_INT (-2 << 27)); + else + i2p27 = GEN_INT (0); + if (TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L) + i43 = force_reg (DImode, GEN_INT (43)); + else + i43 = GEN_INT (0); + emit_insn (gen_nsbdi (nsb_res, + simplify_gen_subreg (DImode, divisor, SImode, 0))); + emit_insn (gen_ashldi3_media (norm64, + gen_rtx_SUBREG (DImode, divisor, 0), + nsb_res)); + emit_insn (gen_ashrdi3_media (tab_ix, norm64, GEN_INT (58))); + emit_insn (gen_ashrdisi3_media_high (norm32, norm64, GEN_INT (32))); + emit_insn (gen_divsi_inv_m1 (inv1, tab_base, tab_ix, norm32, + inv0, scratch0a, scratch0b, + scratch1a, scratch1b)); + emit_insn (gen_subdi3 (shift, i92, nsb_res)); + emit_insn (gen_divsi_inv_m2 (inv2, norm32, inv1, i92, + scratch2a)); + emit_insn (gen_divsi_inv_m3 (result, dividend, inv1, inv2, shift, + i2p27, i43, + scratch3a, scratch3b, scratch3c, + scratch2a, scratch2b, scratch3d, scratch3e)); + if (TARGET_DIVIDE_INV_CALL || TARGET_DIVIDE_INV_CALL2) + emit_insn (gen_divsi_inv_call (operands[0], dividend, divisor, result)); + else if (TARGET_DIVIDE_INV_FP) + emit_insn (gen_divsi_inv_fp (operands[0], dividend, divisor, result, + gen_reg_rtx (SImode), gen_reg_rtx (SImode), + gen_reg_rtx (DFmode), gen_reg_rtx (DFmode), + gen_reg_rtx (DFmode))); + else + emit_move_insn (operands[0], result); + DONE; + } + else if (TARGET_SHMEDIA_FPU && TARGET_DIVIDE_FP) + { + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_divsi3_i4_media (operands[0], operands[1], operands[2])); + DONE; + } + else if (TARGET_SH5) + { + if (TARGET_DIVIDE_CALL2) + { + rtx tab_base = gen_rtx_SYMBOL_REF (Pmode, \"__div_table\"); + tab_base = gen_datalabel_ref (tab_base); + emit_move_insn (gen_rtx_REG (Pmode, R20_REG), tab_base); + } + if (TARGET_FPU_ANY && TARGET_SH1) + function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC); + else if (TARGET_DIVIDE_CALL2) + function_symbol (operands[3], \"__sdivsi3_2\", SFUNC_STATIC); + else + function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT); + + if (TARGET_SHMEDIA) + last = ((TARGET_DIVIDE_CALL2 ? gen_divsi3_media_2 : gen_divsi3_i1_media) + (operands[0], operands[3])); + else if (TARGET_FPU_ANY) + last = gen_divsi3_i4_single (operands[0], operands[3]); + else + last = gen_divsi3_i1 (operands[0], operands[3]); + } + else + { + function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT); + last = gen_divsi3_i1 (operands[0], operands[3]); + } + emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]); + emit_insn (last); + DONE; +}") + +;; operands: scratch, tab_base, tab_ix +;; These are unspecs because we could generate an indexed addressing mode +;; even if -m5-32media, where INDEX_REG_CLASS == NO_REGS, and this would +;; confuse reload. See PR27117. + +(define_insn "divsi_inv_qitable" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (unspec:QI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_DIV_INV_TABLE)))] + "TARGET_SHMEDIA" + "@ + ldx.ub %1, %2, %0" + [(set_attr "type" "load_media") + (set_attr "highpart" "user")]) + +;; operands: scratch, tab_base, tab_ix +(define_insn "divsi_inv_hitable" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (unspec:HI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_DIV_INV_TABLE)))] + "TARGET_SHMEDIA" + "@ + ldx.w %1, %2, %0" + [(set_attr "type" "load_media") + (set_attr "highpart" "user")]) + +;; operands: inv0, tab_base, tab_ix, norm32 +;; scratch equiv in sdivsi3_2: r19, r21 +(define_expand "divsi_inv_m0" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r")] + UNSPEC_DIV_INV_M0)) + (clobber (match_operand:DI 4 "register_operand" "=r")) + (clobber (match_operand:DI 5 "register_operand" "=r"))] + "TARGET_SHMEDIA" + " +{ +/* +tab_base: r20 +tab_ix: r21 +norm32: r25 + ldx.ub r20, r21, r19 // u0.8 + shlli r21, 1, r21 + muls.l r25, r19, r19 // s2.38 + ldx.w r20, r21, r21 // s2.14 + shari r19, 24, r19 // truncate to s2.14 + sub r21, r19, r19 // some 11 bit inverse in s1.14 +*/ + + rtx inv0 = operands[0]; + rtx tab_base = operands[1]; + rtx tab_ix = operands[2]; + rtx norm32 = operands[3]; + rtx scratch0 = operands[4]; + rtx scratch0_si = simplify_gen_subreg (SImode, scratch0, DImode, SIDI_OFF); + rtx scratch1 = operands[5]; + + emit_insn (gen_divsi_inv_qitable (scratch0, tab_base, tab_ix)); + emit_insn (gen_ashldi3_media (scratch1, tab_ix, GEN_INT (1))); + emit_insn (gen_mulsidi3_media (scratch0, norm32, scratch0_si)); + emit_insn (gen_divsi_inv_hitable (scratch1, tab_base, scratch1)); + emit_insn (gen_ashrdi3_media (scratch0, scratch0, GEN_INT (24))); + emit_insn (gen_subdisi3_media (inv0, scratch1, scratch0)); + DONE; +}") + +;; operands: inv1, tab_base, tab_ix, norm32 +(define_insn_and_split "divsi_inv_m1" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r")] + UNSPEC_DIV_INV_M1)) + (clobber (match_operand:SI 4 "register_operand" "=r")) + (clobber (match_operand:DI 5 "register_operand" "=r")) + (clobber (match_operand:DI 6 "register_operand" "=r")) + (clobber (match_operand:DI 7 "register_operand" "=r")) + (clobber (match_operand:DI 8 "register_operand" "=r"))] + "TARGET_SHMEDIA" + "#" + "&& !can_create_pseudo_p ()" + [(pc)] + " +{ +/* inv0: r19 + muls.l r19, r19, r18 // u0.28 + muls.l r25, r18, r18 // s2.58 + shlli r19, 45, r0 // multiply by two and convert to s2.58 + sub r0, r18, r18 + shari r18, 28, r18 // some 18 bit inverse in s1.30 +*/ + + rtx inv1 = operands[0]; + rtx tab_base = operands[1]; + rtx tab_ix = operands[2]; + rtx norm32 = operands[3]; + rtx inv0 = operands[4]; + rtx inv0_di = simplify_gen_subreg (DImode, inv0, SImode, 0); + rtx scratch0a = operands[5]; + rtx scratch0b = operands[6]; + rtx scratch0 = operands[7]; + rtx scratch1 = operands[8]; + rtx scratch1_si = simplify_gen_subreg (SImode, scratch1, DImode, SIDI_OFF); + + emit_insn (gen_divsi_inv_m0 (inv0, tab_base, tab_ix, norm32, + scratch0a, scratch0b)); + emit_insn (gen_mulsidi3_media (scratch1, inv0, inv0)); + emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch1_si)); + emit_insn (gen_ashldi3_media (scratch0, inv0_di, GEN_INT (45))); + emit_insn (gen_subdi3 (scratch1, scratch0, scratch1)); + emit_insn (gen_ashrdisi3_media_opaque (inv1, scratch1, GEN_INT (28))); + DONE; +}") + +;; operands: inv2, norm32, inv1, i92 +(define_insn_and_split "divsi_inv_m2" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r") + (match_operand:DI 3 "register_operand" "r")] + UNSPEC_DIV_INV_M2)) + (clobber (match_operand:DI 4 "register_operand" "=r"))] + "TARGET_SHMEDIA" + "#" + "&& !can_create_pseudo_p ()" + [(pc)] + " +{ +/* + muls.l r18, r25, r0 // s2.60 + shari r0, 16, r0 // s-16.44 + sub + muls.l r0, r18, r19 // s-16.74 + shari r19, 30, r19 // s-16.44 +*/ + rtx inv2 = operands[0]; + rtx norm32 = operands[1]; + rtx inv1 = operands[2]; + rtx i92 = operands[3]; + rtx scratch0 = operands[4]; + rtx scratch0_si = simplify_gen_subreg (SImode, scratch0, DImode, SIDI_OFF); + + emit_insn (gen_mulsidi3_media (scratch0, inv1, norm32)); + emit_insn (gen_ashrdi3_media (scratch0, scratch0, GEN_INT (16))); + emit_insn (gen_subdi3 (scratch0, i92, scratch0)); + emit_insn (gen_mulsidi3_media (scratch0, scratch0_si, inv1)); + emit_insn (gen_ashrdisi3_media_opaque (inv2, scratch0, GEN_INT (30))); + DONE; +}") + +(define_insn_and_split "divsi_inv_m3" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN") + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r") + (match_operand:DI 4 "register_operand" "r") + (match_operand:DI 5 "arith_reg_or_0_operand" "rN") + (match_operand:DI 6 "arith_reg_or_0_operand" "rN")] + UNSPEC_DIV_INV_M3)) + (clobber (match_operand:DI 7 "register_operand" "=r")) + (clobber (match_operand:DI 8 "register_operand" "=r")) + (clobber (match_operand:DI 9 "register_operand" "=r")) + (clobber (match_operand:DI 10 "register_operand" "=r")) + (clobber (match_operand:SI 11 "register_operand" "=r")) + (clobber (match_operand:SI 12 "register_operand" "=r")) + (clobber (match_operand:DI 13 "register_operand" "=r"))] + "TARGET_SHMEDIA" + "#" + "&& !can_create_pseudo_p ()" + [(pc)] + " +{ +/* + r0: result r1: shift r4: dividend r18: inv1 r19: inv2 + r0: scratch0 r19: scratch1 r21: scratch2 + + muls.l r18, r4, r25 // s32.30 + muls.l r19, r4, r19 // s15.30 + shari r25, 63, r21 + shari r19, 14, r19 // s18.-14 + sub r25, r19, r0 + shard r0, r1, r0 + sub r0, r21, r0 +*/ + + rtx result = operands[0]; + rtx dividend = operands[1]; + rtx inv1 = operands[2]; + rtx inv2 = operands[3]; + rtx shift = operands[4]; + rtx scratch0 = operands[7]; + rtx scratch1 = operands[8]; + rtx scratch2 = operands[9]; + + if (satisfies_constraint_N (dividend)) + { + emit_move_insn (result, dividend); + DONE; + } + + emit_insn (gen_mulsidi3_media (scratch0, inv1, dividend)); + emit_insn (gen_mulsidi3_media (scratch1, inv2, dividend)); + emit_insn (gen_ashrdi3_media (scratch2, scratch0, GEN_INT (63))); + emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (14))); + emit_insn (gen_adddi3 (scratch0, scratch0, scratch1)); + emit_insn (gen_ashrdi3_media (scratch0, scratch0, shift)); + emit_insn (gen_subdisi3_media (result, scratch0, scratch2)); + DONE; +}") + +;; operands: quotient, dividend, inv1, inv2, shift, i2p27, i43 +;; inv1: tab_base, tab_ix, norm32 +;; inv2: norm32, inv1, i92 +(define_insn_and_split "divsi_inv_m1_3" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN") + (unspec:SI [(match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "register_operand" "r") + (match_operand:SI 4 "register_operand" "r")] + UNSPEC_DIV_INV_M1) + (unspec:SI [(match_dup 4) + (unspec:SI [(match_dup 2) + (match_dup 3) + (match_dup 4)] UNSPEC_DIV_INV_M1) + (match_operand:SI 5 "" "")] + UNSPEC_DIV_INV_M2) + (match_operand:DI 6 "register_operand" "r") + (match_operand:DI 7 "arith_reg_or_0_operand" "rN") + (match_operand:DI 8 "arith_reg_or_0_operand" "rN")] + UNSPEC_DIV_INV_M3)) + (clobber (match_operand:DI 9 "register_operand" "=r")) + (clobber (match_operand:DI 10 "register_operand" "=r")) + (clobber (match_operand:DI 11 "register_operand" "=r")) + (clobber (match_operand:DI 12 "register_operand" "=r")) + (clobber (match_operand:SI 13 "register_operand" "=r")) + (clobber (match_operand:SI 14 "register_operand" "=r")) + (clobber (match_operand:DI 15 "register_operand" "=r"))] + "TARGET_SHMEDIA + && (TARGET_DIVIDE_INV_MINLAT + || TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)" + "#" + "&& !can_create_pseudo_p ()" + [(pc)] + " +{ + rtx result = operands[0]; + rtx dividend = operands[1]; + rtx tab_base = operands[2]; + rtx tab_ix = operands[3]; + rtx norm32 = operands[4]; + /* rtx i92 = operands[5]; */ + rtx shift = operands[6]; + rtx i2p27 = operands[7]; + rtx i43 = operands[8]; + rtx scratch0 = operands[9]; + rtx scratch0_si = simplify_gen_subreg (SImode, scratch0, DImode, SIDI_OFF); + rtx scratch1 = operands[10]; + rtx scratch1_si = simplify_gen_subreg (SImode, scratch1, DImode, SIDI_OFF); + rtx scratch2 = operands[11]; + rtx scratch3 = operands[12]; + rtx scratch4 = operands[13]; + rtx scratch4_di = simplify_gen_subreg (DImode, scratch4, SImode, 0); + rtx scratch5 = operands[14]; + rtx scratch5_di = simplify_gen_subreg (DImode, scratch5, SImode, 0); + rtx scratch6 = operands[15]; + + emit_insn (gen_divsi_inv_m0 (scratch4, tab_base, tab_ix, norm32, + scratch0, scratch1)); + /* inv0 == scratch4 */ + if (! TARGET_DIVIDE_INV20U) + { + emit_insn (gen_mulsidi3_media (scratch0, scratch4, scratch4)); + i2p27 = scratch0; + emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch0_si)); + } + else + { + emit_insn (gen_mulsidi3_media (scratch1, scratch4, scratch4)); + emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch1_si)); + } + emit_insn (gen_ashldi3_media (scratch2, scratch4_di, GEN_INT (45))); + emit_insn (gen_subdi3 (scratch1, scratch2, scratch1)); + emit_insn (gen_ashrdisi3_media_opaque (scratch4, scratch1, GEN_INT (28))); + /* inv1 == scratch4 */ + + if (TARGET_DIVIDE_INV_MINLAT) + { + emit_insn (gen_mulsidi3_media (scratch1, scratch4, norm32)); + emit_insn (gen_mulsidi3_media (scratch2, dividend, scratch4)); + emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (16))); + emit_insn (gen_mulsidi3_media (scratch1, scratch1_si, scratch4)); + emit_insn (gen_ashrdi3_media (scratch3, scratch2, GEN_INT (63))); + emit_insn (gen_ashrsi3_media (scratch5, dividend, GEN_INT (14))); + emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (30))); + emit_insn (gen_mulsidi3_media (scratch1, scratch1_si, scratch5)); + emit_insn (gen_xordi3 (scratch0, scratch3, i2p27)); + emit_insn (gen_adddi3 (scratch2, scratch2, scratch0)); + emit_insn (gen_subdi3 (scratch2, scratch2, scratch1)); + } + else + { + rtx label = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ()); + /* Use separate scratch regs for nsb and sign to allow scheduling. */ + emit_insn (gen_nsbdi (scratch6, + simplify_gen_subreg (DImode, dividend, SImode, 0))); + emit_insn (gen_xorsi3 (scratch5, dividend, norm32)); + emit_insn (gen_ashrdi3_media (scratch3, scratch5_di, GEN_INT (63))); + emit_insn (gen_divsi_inv20 (scratch2, + norm32, scratch4, dividend, + scratch6, scratch3, i43, + /* scratch0 may be shared with i2p27. */ + scratch0, scratch1, scratch5, + label, label, i2p27)); + } + emit_insn (gen_ashrdi3_media (scratch2, scratch2, shift)); + emit_insn (gen_subdisi3_media (result, scratch2, scratch3)); + DONE; +}") + +(define_insn "divsi_inv20" + [(set (match_operand:DI 0 "register_operand" "=&r") + (unspec:DI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r") + (match_operand:DI 4 "register_operand" "r") + (match_operand:DI 5 "register_operand" "r") + (match_operand:DI 6 "register_operand" "r") + (match_operand:DI 12 "register_operand" "r") + (match_operand 10 "target_operand" "b") + (match_operand 11 "immediate_operand" "i")] + UNSPEC_DIV_INV20)) + (clobber (match_operand:DI 7 "register_operand" "=&r")) + (clobber (match_operand:DI 8 "register_operand" "=&r")) + (clobber (match_operand:SI 9 "register_operand" "=r"))] + "TARGET_SHMEDIA + && (TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)" + "* +{ +/* operands: %0 div_result, %1 norm32, %2 inv1, %3 dividend, + %4 dividend_nsb, %5 result_sign, %6 i43, %12 i2p27, + %7 round_scratch, %8 scratch0 (di), %9 scratch1 (si) + %10 label (tr), %11 label (imm) + + muls.l inv1, norm32, scratch0 // s2.60 + muls.l inv1, dividend, result // s32.30 + xor i2p27, result_sign, round_scratch + bge/u dividend_nsb, i43, tr.. (label) + shari scratch0, 16, scratch0 // s-16.44 + muls.l sratch0_si, inv1, scratch0 // s-16.74 + sub result, round_scratch, result + shari dividend, 14, scratch1 // s19.-14 + shari scratch0, 30, scratch0 // s-16.44 + muls.l scratch0, scratch1, round_scratch // s15.30 +label: + sub result, round_scratch, result */ + + int likely = TARGET_DIVIDE_INV20L; + + if (! likely) output_asm_insn (\"muls.l\t%2, %1 , %8\", operands); + output_asm_insn (\"muls.l\t%2, %3, %0\;xor\t%12, %5, %7\", operands); + output_asm_insn (likely + ? \"bge/l\t%4, %6, %10\;muls.l\t%2, %1 , %8\" + : \"bge/u\t%4, %6, %10\", operands); + output_asm_insn (\"shari\t%8, 16, %8\;muls.l\t%8, %2, %8\", operands); + if (! likely) output_asm_insn (\"sub\t%0, %7, %0\", operands); + output_asm_insn (\"shari\t%3, 14, %9\;shari\t%8, 30, %8\", operands); + return (likely + ? \"muls.l\t%8, %9, %8\;sub\t%0, %8, %0\n%11:\tadd\t%0, %7, %0\" + : \"muls.l\t%8, %9, %7\n%11:\tsub\t%0, %7, %0\"); +}") + +(define_insn_and_split "divsi_inv_fp" + [(set (match_operand:SI 0 "general_movdst_operand" "=rf") + (div:SI (match_operand:SI 1 "general_movsrc_operand" "rf") + (match_operand:SI 2 "register_operand" "rf"))) + (use (match_operand:SI 3 "general_movsrc_operand" "r")) + (clobber (match_operand:SI 4 "register_operand" "=r")) + (clobber (match_operand:SI 5 "register_operand" "=r")) + (clobber (match_operand:DF 6 "register_operand" "=r")) + (clobber (match_operand:DF 7 "register_operand" "=r")) + (clobber (match_operand:DF 8 "register_operand" "=r"))] + "TARGET_SHMEDIA_FPU" + "#" + "&& (high_life_started || reload_completed)" + [(set (match_dup 0) (match_dup 3))] + "" + [(set_attr "highpart" "must_split")]) + +;; If a matching group of divide-by-inverse instructions is in the same +;; basic block after gcse & loop optimizations, we want to transform them +;; to a straight division using floating point for TARGET_DIVIDE_INV_FP. +(define_insn_and_split "*divsi_inv_fp_combine" + [(set (match_operand:SI 0 "register_operand" "=f") + (div:SI (match_operand:SI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f"))) + (use (unspec:SI [(match_dup 1) + (match_operand:SI 3 "" "") + (unspec:SI [(match_operand:SI 4 "" "") + (match_dup 3) + (match_operand:DI 5 "" "")] UNSPEC_DIV_INV_M2) + (match_operand:DI 6 "" "") + (const_int 0) + (const_int 0)] UNSPEC_DIV_INV_M3)) + (clobber (match_operand:SI 7 "fp_arith_reg_operand" "")) + (clobber (match_operand:SI 8 "fp_arith_reg_operand" "")) + (clobber (match_operand:DF 9 "fp_arith_reg_operand" "")) + (clobber (match_operand:DF 10 "fp_arith_reg_operand" "")) + (clobber (match_operand:DF 11 "fp_arith_reg_operand" ""))] + "TARGET_SHMEDIA_FPU && TARGET_DIVIDE_INV_FP && !can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 9) (float:DF (match_dup 1))) + (set (match_dup 10) (float:DF (match_dup 2))) + (set (match_dup 11) (div:DF (match_dup 9) (match_dup 10))) + (set (match_dup 8) + (fix:SI (match_dup 11))) + (set (match_dup 0) (match_dup 8))] + " +{ + if (! fp_arith_reg_operand (operands[1], SImode)) + { + emit_move_insn (operands[7], operands[1]); + operands[1] = operands[7]; + } + if (! fp_arith_reg_operand (operands[2], SImode)) + { + emit_move_insn (operands[8], operands[2]); + operands[2] = operands[8]; + } +}" + [(set_attr "highpart" "must_split")]) + +;; ------------------------------------------------------------------------- +;; Multiplication instructions +;; ------------------------------------------------------------------------- + +(define_insn "umulhisi3_i" + [(set (reg:SI MACL_REG) + (mult:SI (zero_extend:SI + (match_operand:HI 0 "arith_reg_operand" "r")) + (zero_extend:SI + (match_operand:HI 1 "arith_reg_operand" "r"))))] + "TARGET_SH1" + "mulu.w %1,%0" + [(set_attr "type" "smpy")]) + +(define_insn "mulhisi3_i" + [(set (reg:SI MACL_REG) + (mult:SI (sign_extend:SI + (match_operand:HI 0 "arith_reg_operand" "r")) + (sign_extend:SI + (match_operand:HI 1 "arith_reg_operand" "r"))))] + "TARGET_SH1" + "muls.w %1,%0" + [(set_attr "type" "smpy")]) + +(define_expand "mulhisi3" + [(set (reg:SI MACL_REG) + (mult:SI (sign_extend:SI + (match_operand:HI 1 "arith_reg_operand" "")) + (sign_extend:SI + (match_operand:HI 2 "arith_reg_operand" "")))) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI MACL_REG))] + "TARGET_SH1" + " +{ + rtx insn, macl; + + macl = gen_rtx_REG (SImode, MACL_REG); + start_sequence (); + emit_insn (gen_mulhisi3_i (operands[1], operands[2])); + insn = get_insns (); + end_sequence (); + /* expand_binop can't find a suitable code in umul_widen_optab to + make a REG_EQUAL note from, so make one here. + See also smulsi3_highpart. + ??? Alternatively, we could put this at the calling site of expand_binop, + i.e. expand_expr. */ + /* Use emit_libcall_block for loop invariant code motion and to make + a REG_EQUAL note. */ + emit_libcall_block (insn, operands[0], macl, SET_SRC (single_set (insn))); + + DONE; +}") + +(define_expand "umulhisi3" + [(set (reg:SI MACL_REG) + (mult:SI (zero_extend:SI + (match_operand:HI 1 "arith_reg_operand" "")) + (zero_extend:SI + (match_operand:HI 2 "arith_reg_operand" "")))) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI MACL_REG))] + "TARGET_SH1" + " +{ + rtx insn, macl; + + macl = gen_rtx_REG (SImode, MACL_REG); + start_sequence (); + emit_insn (gen_umulhisi3_i (operands[1], operands[2])); + insn = get_insns (); + end_sequence (); + /* expand_binop can't find a suitable code in umul_widen_optab to + make a REG_EQUAL note from, so make one here. + See also smulsi3_highpart. + ??? Alternatively, we could put this at the calling site of expand_binop, + i.e. expand_expr. */ + /* Use emit_libcall_block for loop invariant code motion and to make + a REG_EQUAL note. */ + emit_libcall_block (insn, operands[0], macl, SET_SRC (single_set (insn))); + + DONE; +}") + +;; mulsi3 on the SH2 can be done in one instruction, on the SH1 we generate +;; a call to a routine which clobbers known registers. + +(define_insn "" + [(set (match_operand:SI 1 "register_operand" "=z") + (mult:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI MACL_REG)) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R3_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R1_REG)) + (use (match_operand:SI 0 "arith_reg_operand" "r"))] + "TARGET_SH1" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "mulsi3_call" + [(set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" "")) + (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" "")) + (parallel[(set (match_operand:SI 0 "register_operand" "") + (mult:SI (reg:SI R4_REG) + (reg:SI R5_REG))) + (clobber (reg:SI MACL_REG)) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R3_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R1_REG)) + (use (match_operand:SI 3 "register_operand" ""))])] + "TARGET_SH1" + "") + +(define_insn "mul_r" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (mult:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "z")))] + "TARGET_SH2A" + "mulr %2,%0" + [(set_attr "type" "dmpy")]) + +(define_insn "mul_l" + [(set (reg:SI MACL_REG) + (mult:SI (match_operand:SI 0 "arith_reg_operand" "r") + (match_operand:SI 1 "arith_reg_operand" "r")))] + "TARGET_SH2" + "mul.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "mulsi3" + [(set (reg:SI MACL_REG) + (mult:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" ""))) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI MACL_REG))] + "TARGET_SH1" + " +{ + if (!TARGET_SH2) + { + /* The address must be set outside the libcall, + since it goes into a pseudo. */ + rtx sym = function_symbol (NULL, \"__mulsi3\", SFUNC_STATIC); + rtx addr = force_reg (SImode, sym); + rtx insns = gen_mulsi3_call (operands[0], operands[1], + operands[2], addr); + emit_insn (insns); + } + else + { + rtx macl = gen_rtx_REG (SImode, MACL_REG); + + emit_insn (gen_mul_l (operands[1], operands[2])); + /* consec_sets_giv can only recognize the first insn that sets a + giv as the giv insn. So we must tag this also with a REG_EQUAL + note. */ + emit_insn (gen_movsi_i ((operands[0]), macl)); + } + DONE; +}") + +(define_insn "mulsidi3_i" + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (set (reg:SI MACL_REG) + (mult:SI (match_dup 0) + (match_dup 1)))] + "TARGET_SH2" + "dmuls.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "mulsidi3" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_SH2 || TARGET_SHMEDIA" + " +{ + if (TARGET_SH2) + { + emit_insn (gen_mulsidi3_compact (operands[0], operands[1], + operands[2])); + DONE; + } +}") + +(define_insn "mulsidi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "extend_reg_operand" "%r")) + (sign_extend:DI (match_operand:SI 2 "extend_reg_operand" "r"))))] + "TARGET_SHMEDIA" + "muls.l %1, %2, %0" + [(set_attr "type" "dmpy_media") + (set_attr "highpart" "ignore")]) + +(define_insn "mulsidi3_compact" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (mult:DI + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "r")))) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI MACL_REG))] + "TARGET_SH2" + "#") + +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (mult:DI + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI MACL_REG))] + "TARGET_SH2" + [(const_int 0)] + " +{ + rtx low_dst = gen_lowpart (SImode, operands[0]); + rtx high_dst = gen_highpart (SImode, operands[0]); + + emit_insn (gen_mulsidi3_i (operands[1], operands[2])); + + emit_move_insn (low_dst, gen_rtx_REG (SImode, MACL_REG)); + emit_move_insn (high_dst, gen_rtx_REG (SImode, MACH_REG)); + /* We need something to tag the possible REG_EQUAL notes on to. */ + emit_move_insn (operands[0], operands[0]); + DONE; +}") + +(define_insn "umulsidi3_i" + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (set (reg:SI MACL_REG) + (mult:SI (match_dup 0) + (match_dup 1)))] + "TARGET_SH2" + "dmulu.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "umulsidi3" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_SH2 || TARGET_SHMEDIA" + " +{ + if (TARGET_SH2) + { + emit_insn (gen_umulsidi3_compact (operands[0], operands[1], + operands[2])); + DONE; + } +}") + +(define_insn "umulsidi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_operand" "%r")) + (zero_extend:DI (match_operand:SI 2 "extend_reg_operand" "r"))))] + "TARGET_SHMEDIA" + "mulu.l %1, %2, %0" + [(set_attr "type" "dmpy_media") + (set_attr "highpart" "ignore")]) + +(define_insn "umulsidi3_compact" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (mult:DI + (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "r")))) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI MACL_REG))] + "TARGET_SH2" + "#") + +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI MACL_REG))] + "TARGET_SH2" + [(const_int 0)] + " +{ + rtx low_dst = gen_lowpart (SImode, operands[0]); + rtx high_dst = gen_highpart (SImode, operands[0]); + + emit_insn (gen_umulsidi3_i (operands[1], operands[2])); + + emit_move_insn (low_dst, gen_rtx_REG (SImode, MACL_REG)); + emit_move_insn (high_dst, gen_rtx_REG (SImode, MACH_REG)); + /* We need something to tag the possible REG_EQUAL notes on to. */ + emit_move_insn (operands[0], operands[0]); + DONE; +}") + +(define_insn "smulsi3_highpart_i" + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (clobber (reg:SI MACL_REG))] + "TARGET_SH2" + "dmuls.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "smulsi3_highpart" + [(parallel + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))) + (const_int 32)))) + (clobber (reg:SI MACL_REG))]) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI MACH_REG))] + "TARGET_SH2" + " +{ + rtx insn, mach; + + mach = gen_rtx_REG (SImode, MACH_REG); + start_sequence (); + emit_insn (gen_smulsi3_highpart_i (operands[1], operands[2])); + insn = get_insns (); + end_sequence (); + /* expand_binop can't find a suitable code in mul_highpart_optab to + make a REG_EQUAL note from, so make one here. + See also {,u}mulhisi. + ??? Alternatively, we could put this at the calling site of expand_binop, + i.e. expand_mult_highpart. */ + /* Use emit_libcall_block for loop invariant code motion and to make + a REG_EQUAL note. */ + emit_libcall_block (insn, operands[0], mach, SET_SRC (single_set (insn))); + + DONE; +}") + +(define_insn "umulsi3_highpart_i" + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (clobber (reg:SI MACL_REG))] + "TARGET_SH2" + "dmulu.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "umulsi3_highpart" + [(parallel + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))) + (const_int 32)))) + (clobber (reg:SI MACL_REG))]) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI MACH_REG))] + "TARGET_SH2" + " +{ + rtx insn, mach; + + mach = gen_rtx_REG (SImode, MACH_REG); + start_sequence (); + emit_insn (gen_umulsi3_highpart_i (operands[1], operands[2])); + insn = get_insns (); + end_sequence (); + /* Use emit_libcall_block for loop invariant code motion and to make + a REG_EQUAL note. */ + emit_libcall_block (insn, operands[0], mach, SET_SRC (single_set (insn))); + + DONE; +}") + +(define_insn_and_split "muldi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (mult:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "arith_reg_operand" "r"))) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (match_scratch:DI 4 "=r"))] + "TARGET_SHMEDIA" + "#" + "reload_completed" + [(const_int 0)] + " +{ + rtx op3_v2si, op2_v2si; + + op3_v2si = operands[3]; + if (GET_CODE (op3_v2si) == SIGN_EXTEND) + { + op3_v2si = XEXP (op3_v2si, 0); + op3_v2si = simplify_gen_subreg (DImode, op3_v2si, GET_MODE (op3_v2si), 0); + } + op3_v2si = simplify_gen_subreg (V2SImode, op3_v2si, DImode, 0); + op2_v2si = operands[2]; + if (GET_CODE (op2_v2si) == SIGN_EXTEND) + { + op2_v2si = XEXP (op2_v2si, 0); + op2_v2si = simplify_gen_subreg (DImode, op2_v2si, GET_MODE (op2_v2si), 0); + } + op2_v2si = simplify_gen_subreg (V2SImode, op2_v2si, DImode, 0); + emit_insn (gen_rotldi3 (operands[3], operands[1], GEN_INT (32))); + emit_insn (gen_mulv2si3 (op3_v2si, op3_v2si, op2_v2si)); + emit_insn (gen_umulsidi3_media (operands[4], + sh_gen_truncate (SImode, operands[1], 0), + sh_gen_truncate (SImode, operands[2], 0))); + emit_insn (gen_anddi3 (operands[0], operands[3], GEN_INT (0xffffffff00000000LL))); + emit_insn (gen_ashldi3_media (operands[3], operands[3], GEN_INT (32))); + emit_insn (gen_adddi3 (operands[0], operands[3], operands[0])); + emit_insn (gen_adddi3 (operands[0], operands[4], operands[0])); + DONE; +}") + + +;; ------------------------------------------------------------------------- +;; Logical operations +;; ------------------------------------------------------------------------- + +(define_insn "*andsi3_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,z") + (and:SI (match_operand:SI 1 "arith_reg_operand" "%0,0") + (match_operand:SI 2 "logical_operand" "r,K08")))] + "TARGET_SH1" + "and %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "*andsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (and:SI (match_operand:SI 1 "logical_reg_operand" "%r,r") + (match_operand:SI 2 "logical_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + and %1, %2, %0 + andi %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*andsi3_bclr" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (and:SI (match_operand:SI 1 "arith_reg_operand" "%0") + (match_operand:SI 2 "const_int_operand" "Psz")))] + "TARGET_SH2A && satisfies_constraint_Psz (operands[2])" + "bclr\\t%W2,%0" + [(set_attr "type" "arith")]) + +;; If the constant is 255, then emit an extu.b instruction instead of an +;; and, since that will give better code. + +(define_expand "andsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (and:SI (match_operand:SI 1 "logical_reg_operand" "") + (match_operand:SI 2 "logical_operand" "")))] + "" + " +{ + if (TARGET_SH1 + && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 255) + { + emit_insn (gen_zero_extendqisi2 (operands[0], + gen_lowpart (QImode, operands[1]))); + DONE; + } +}") + +(define_insn_and_split "anddi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r,r") + (and:DI (match_operand:DI 1 "arith_reg_operand" "%r,r,r") + (match_operand:DI 2 "and_operand" "r,I10,J16")))] + "TARGET_SHMEDIA" + "@ + and %1, %2, %0 + andi %1, %2, %0 + #" + "reload_completed + && ! logical_operand (operands[2], DImode)" + [(const_int 0)] + " +{ + if ((unsigned)INTVAL (operands[2]) == (unsigned) 0xffffffff) + emit_insn (gen_mshflo_l_di (operands[0], operands[1], CONST0_RTX (DImode))); + else + emit_insn (gen_mshfhi_l_di (operands[0], CONST0_RTX (DImode), operands[1])); + DONE; +}" + [(set_attr "type" "arith_media")]) + +(define_insn "andcsi3" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (and:SI (match_operand:SI 1 "arith_reg_operand" "r") + (not:SI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_SHMEDIA" + "andc %1,%2,%0" + [(set_attr "type" "arith_media")]) + +(define_insn "andcdi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (and:DI (match_operand:DI 1 "arith_reg_operand" "r") + (not:DI (match_operand:DI 2 "arith_reg_operand" "r"))))] + "TARGET_SHMEDIA" + "andc %1,%2,%0" + [(set_attr "type" "arith_media")]) + +(define_expand "iorsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (ior:SI (match_operand:SI 1 "logical_reg_operand" "") + (match_operand:SI 2 "logical_operand" "")))] + "" + "") + +(define_insn "*iorsi3_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,z") + (ior:SI (match_operand:SI 1 "arith_reg_operand" "%0,0") + (match_operand:SI 2 "logical_operand" "r,K08")))] + "TARGET_SH1 + && !(TARGET_SH2A && satisfies_constraint_Pso (operands[2]))" + "or %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "*iorsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (ior:SI (match_operand:SI 1 "logical_reg_operand" "%r,r") + (match_operand:SI 2 "logical_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + or %1, %2, %0 + ori %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*iorsi3_bset" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (match_operand:SI 1 "arith_reg_operand" "%0") + (match_operand:SI 2 "const_int_operand" "Pso")))] + "TARGET_SH2A && satisfies_constraint_Pso (operands[2])" + "bset\\t%V2,%0" + [(set_attr "type" "arith")]) + +(define_insn "iordi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (ior:DI (match_operand:DI 1 "arith_reg_operand" "%r,r") + (match_operand:DI 2 "logical_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + or %1, %2, %0 + ori %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn_and_split "*logical_sidi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (sign_extend:DI (match_operator:SI 3 "logical_operator" + [(match_operand:SI 1 "arith_reg_operand" "%r,r") + (match_operand:SI 2 "logical_operand" "r,I10")])))] + "TARGET_SHMEDIA" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 3))] + " +{ + operands[3] + = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode, + simplify_gen_subreg (DImode, operands[1], SImode, 0), + simplify_gen_subreg (DImode, operands[2], SImode, 0)); +}") + +(define_insn_and_split "*logical_sidisi3" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (truncate:SI (sign_extend:DI + (match_operator:SI 3 "logical_operator" + [(match_operand:SI 1 "arith_reg_operand" "%r,r") + (match_operand:SI 2 "logical_operand" "r,I10")]))))] + "TARGET_SHMEDIA" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 3))]) + +(define_insn_and_split "*logical_sidi3_2" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (sign_extend:DI (truncate:SI (sign_extend:DI + (match_operator:SI 3 "logical_operator" + [(match_operand:SI 1 "arith_reg_operand" "%r,r") + (match_operand:SI 2 "logical_operand" "r,I10")])))))] + "TARGET_SHMEDIA" + "#" + "&& 1" + [(set (match_dup 0) (sign_extend:DI (match_dup 3)))]) + +(define_expand "xorsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (xor:SI (match_operand:SI 1 "logical_reg_operand" "") + (match_operand:SI 2 "xor_operand" "")))] + "" + "") + +(define_insn "*xorsi3_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=z,r") + (xor:SI (match_operand:SI 1 "arith_reg_operand" "%0,0") + (match_operand:SI 2 "logical_operand" "K08,r")))] + "TARGET_SH1" + "xor %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "*xorsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (xor:SI (match_operand:SI 1 "logical_reg_operand" "%r,r") + (match_operand:SI 2 "xor_operand" "r,I06")))] + "TARGET_SHMEDIA" + "@ + xor %1, %2, %0 + xori %1, %2, %0" + [(set_attr "type" "arith_media")]) + +;; Store the complements of the T bit in a register. +(define_insn "xorsi3_movrt" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (xor:SI (reg:SI T_REG) + (const_int 1)))] + "TARGET_SH2A" + "movrt\\t%0" + [(set_attr "type" "arith")]) + +(define_insn "xordi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (xor:DI (match_operand:DI 1 "arith_reg_operand" "%r,r") + (match_operand:DI 2 "xor_operand" "r,I06")))] + "TARGET_SHMEDIA" + "@ + xor %1, %2, %0 + xori %1, %2, %0" + [(set_attr "type" "arith_media")]) + +;; Combiner bridge pattern for 2 * sign extend -> logical op -> truncate. +;; converts 2 * sign extend -> logical op into logical op -> sign extend +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (sign_extend:DI (match_operator 4 "binary_logical_operator" + [(match_operand 1 "any_register_operand" "") + (match_operand 2 "any_register_operand" "")])))] + "TARGET_SHMEDIA" + [(set (match_dup 5) (match_dup 4)) + (set (match_dup 0) (sign_extend:DI (match_dup 5)))] +" +{ + enum machine_mode inmode = GET_MODE (operands[1]); + int offset = 0; + + if (GET_CODE (operands[0]) == SUBREG) + { + offset = SUBREG_BYTE (operands[0]); + operands[0] = SUBREG_REG (operands[0]); + } + gcc_assert (REG_P (operands[0])); + if (! TARGET_LITTLE_ENDIAN) + offset += 8 - GET_MODE_SIZE (inmode); + operands[5] = gen_rtx_SUBREG (inmode, operands[0], offset); +}") + +;; ------------------------------------------------------------------------- +;; Shifts and rotates +;; ------------------------------------------------------------------------- + +(define_expand "rotldi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (rotate:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:HI 2 "mextr_bit_offset" "i")))] + "TARGET_SHMEDIA" + "if (! mextr_bit_offset (operands[2], HImode)) FAIL;") + +(define_insn "rotldi3_mextr" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (rotate:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:HI 2 "mextr_bit_offset" "i")))] + "TARGET_SHMEDIA" + "* +{ + static char templ[16]; + + sprintf (templ, \"mextr%d\\t%%1,%%1,%%0\", + 8 - (int) (INTVAL (operands[2]) >> 3)); + return templ; +}" + [(set_attr "type" "arith_media")]) + +(define_expand "rotrdi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (rotatert:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:HI 2 "mextr_bit_offset" "i")))] + "TARGET_SHMEDIA" + "if (! mextr_bit_offset (operands[2], HImode)) FAIL;") + +(define_insn "rotrdi3_mextr" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (rotatert:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:HI 2 "mextr_bit_offset" "i")))] + "TARGET_SHMEDIA" + "* +{ + static char templ[16]; + + sprintf (templ, \"mextr%d\\t%%1,%%1,%%0\", (int) INTVAL (operands[2]) >> 3); + return templ; +}" + [(set_attr "type" "arith_media")]) + +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (ior:DI (zero_extend:DI (mem:QI (match_operand 1 + "ua_address_operand" ""))) + (ashift:DI (match_operand:DI 2 "arith_reg_operand" "") + (const_int 8)))) + (clobber (match_operand:DI 3 "register_operand" ""))] + "TARGET_SHMEDIA" + [(match_dup 4) (match_dup 5)] + " +{ + operands[4] = ((TARGET_LITTLE_ENDIAN ? gen_ldhi_q : gen_ldlo_q) + (operands[3], operands[1])); + operands[5] = gen_mextr_rl (operands[0], operands[3], operands[2], + GEN_INT (56), GEN_INT (8)); +}") + +(define_insn "rotlsi3_1" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (set (reg:SI T_REG) + (lshiftrt:SI (match_dup 1) (const_int 31)))] + "TARGET_SH1" + "rotl %0" + [(set_attr "type" "arith")]) + +(define_insn "rotlsi3_31" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 31))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "rotr %0" + [(set_attr "type" "arith")]) + +(define_insn "rotlsi3_16" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 16)))] + "TARGET_SH1" + "swap.w %1,%0" + [(set_attr "type" "arith")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "immediate_operand" "")))] + "TARGET_SH1" + " +{ + static const char rot_tab[] = { + 000, 000, 000, 000, 000, 000, 010, 001, + 001, 001, 011, 013, 003, 003, 003, 003, + 003, 003, 003, 003, 003, 013, 012, 002, + 002, 002, 010, 000, 000, 000, 000, 000, + }; + + int count, choice; + + if (!CONST_INT_P (operands[2])) + FAIL; + count = INTVAL (operands[2]); + choice = rot_tab[count]; + if (choice & 010 && SH_DYNAMIC_SHIFT_COST <= 1) + FAIL; + choice &= 7; + switch (choice) + { + case 0: + emit_move_insn (operands[0], operands[1]); + count -= (count & 16) * 2; + break; + case 3: + emit_insn (gen_rotlsi3_16 (operands[0], operands[1])); + count -= 16; + break; + case 1: + case 2: + { + rtx parts[2]; + parts[0] = gen_reg_rtx (SImode); + parts[1] = gen_reg_rtx (SImode); + emit_insn (gen_rotlsi3_16 (parts[2-choice], operands[1])); + emit_move_insn (parts[choice-1], operands[1]); + emit_insn (gen_ashlsi3 (parts[0], parts[0], GEN_INT (8))); + emit_insn (gen_lshrsi3 (parts[1], parts[1], GEN_INT (8))); + emit_insn (gen_iorsi3 (operands[0], parts[0], parts[1])); + count = (count & ~16) - 8; + } + } + + for (; count > 0; count--) + emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); + for (; count < 0; count++) + emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); + + DONE; +}") + +(define_insn "*rotlhi3_8" + [(set (match_operand:HI 0 "arith_reg_dest" "=r") + (rotate:HI (match_operand:HI 1 "arith_reg_operand" "r") + (const_int 8)))] + "TARGET_SH1" + "swap.b %1,%0" + [(set_attr "type" "arith")]) + +(define_expand "rotlhi3" + [(set (match_operand:HI 0 "arith_reg_operand" "") + (rotate:HI (match_operand:HI 1 "arith_reg_operand" "") + (match_operand:HI 2 "immediate_operand" "")))] + "TARGET_SH1" + " +{ + if (!CONST_INT_P (operands[2]) || INTVAL (operands[2]) != 8) + FAIL; +}") + +;; +;; shift left + +(define_insn "ashlsi3_sh2a" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")))] + "TARGET_SH2A" + "shad %2,%0" + [(set_attr "type" "arith") + (set_attr "length" "4")]) + +;; This pattern is used by init_expmed for computing the costs of shift +;; insns. + +(define_insn_and_split "ashlsi3_std" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r,r,r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0,0,0,0") + (match_operand:SI 2 "nonmemory_operand" "r,M,P27,?ri"))) + (clobber (match_scratch:SI 3 "=X,X,X,&r"))] + "TARGET_SH3 + || (TARGET_SH1 && satisfies_constraint_P27 (operands[2]))" + "@ + shld %2,%0 + add %0,%0 + shll%O2 %0 + #" + "TARGET_SH3 + && reload_completed + && CONST_INT_P (operands[2]) + && ! satisfies_constraint_P27 (operands[2])" + [(set (match_dup 3) (match_dup 2)) + (parallel + [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 3))) + (clobber (match_dup 4))])] + "operands[4] = gen_rtx_SCRATCH (SImode);" + [(set_attr "length" "*,*,*,4") + (set_attr "type" "dyn_shift,arith,arith,arith")]) + +(define_insn "ashlhi3_k" + [(set (match_operand:HI 0 "arith_reg_dest" "=r,r") + (ashift:HI (match_operand:HI 1 "arith_reg_operand" "0,0") + (match_operand:HI 2 "const_int_operand" "M,P27")))] + "TARGET_SH1 && satisfies_constraint_P27 (operands[2])" + "@ + add %0,%0 + shll%O2 %0" + [(set_attr "type" "arith")]) + +(define_insn "ashlsi3_n" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && ! sh_dynamicalize_shift_p (operands[2])" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1)) + (const_string "2") + (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3)) + (const_string "6")] + (const_string "8"))) + (set_attr "type" "arith")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && reload_completed" + [(use (reg:SI R0_REG))] + " +{ + gen_shifty_op (ASHIFT, operands); + DONE; +}") + +(define_insn "ashlsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (ashift:SI (match_operand:SI 1 "extend_reg_operand" "r,r") + (match_operand:SI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA" + "@ + shlld.l %1, %2, %0 + shlli.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_expand "ashlsi3" + [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:SI T_REG))])] + "" + " +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (CONST_INT_P (operands[2]) + && sh_dynamicalize_shift_p (operands[2])) + operands[2] = force_reg (SImode, operands[2]); + if (TARGET_SH3) + { + emit_insn (gen_ashlsi3_std (operands[0], operands[1], operands[2])); + DONE; + } + if (! immediate_operand (operands[2], GET_MODE (operands[2]))) + FAIL; +}") + +(define_insn "*ashlhi3_n" + [(set (match_operand:HI 0 "arith_reg_dest" "=r") + (ashift:HI (match_operand:HI 1 "arith_reg_operand" "0") + (match_operand:HI 2 "const_int_operand" "n"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1)) + (const_string "2") + (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2)) + (const_string "4")] + (const_string "6"))) + (set_attr "type" "arith")]) + +(define_expand "ashlhi3" + [(parallel [(set (match_operand:HI 0 "arith_reg_operand" "") + (ashift:HI (match_operand:HI 1 "arith_reg_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:SI T_REG))])] + "TARGET_SH1" + " +{ + if (!CONST_INT_P (operands[2])) + FAIL; + /* It may be possible to call gen_ashlhi3 directly with more generic + operands. Make sure operands[1] is a HImode register here. */ + if (!arith_reg_operand (operands[1], HImode)) + operands[1] = copy_to_mode_reg (HImode, operands[1]); +}") + +(define_split + [(set (match_operand:HI 0 "arith_reg_dest" "") + (ashift:HI (match_operand:HI 1 "arith_reg_operand" "") + (match_operand:HI 2 "const_int_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && reload_completed" + [(use (reg:SI R0_REG))] + " +{ + gen_shifty_hi_op (ASHIFT, operands); + DONE; +}") + +; +; arithmetic shift right +; + +(define_insn "ashrsi3_sh2a" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_SH2A" + "shad %2,%0" + [(set_attr "type" "dyn_shift") + (set_attr "length" "4")]) + +(define_insn "ashrsi3_k" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "M"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && INTVAL (operands[2]) == 1" + "shar %0" + [(set_attr "type" "arith")]) + +;; We can't do HImode right shifts correctly unless we start out with an +;; explicit zero / sign extension; doing that would result in worse overall +;; code, so just let the machine independent code widen the mode. +;; That's why we don't have ashrhi3_k / lshrhi3_k / lshrhi3_m / lshrhi3 . + + +;; ??? This should be a define expand. + +(define_insn "ashrsi2_16" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 16)))] + "TARGET_SH1" + "#" + [(set_attr "length" "4")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "") + (const_int 16)))] + "TARGET_SH1" + [(set (match_dup 0) (rotate:SI (match_dup 1) (const_int 16))) + (set (match_dup 0) (sign_extend:SI (match_dup 2)))] + "operands[2] = gen_lowpart (HImode, operands[0]);") + +;; ??? This should be a define expand. + +(define_insn "ashrsi2_31" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 31))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + [(set_attr "length" "4")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "") + (const_int 31))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(const_int 0)] + " +{ + emit_insn (gen_ashlsi_c (operands[0], operands[1])); + emit_insn (gen_mov_neg_si_t (copy_rtx (operands[0]))); + DONE; +}") + +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") (const_int 0)) + (set (reg:SI T_REG) + (gt:SI (match_dup 0) (match_operand:SI 1 "arith_reg_operand" "")))] + "TARGET_SH1 + && peep2_reg_dead_p (2, operands[0]) + && peep2_reg_dead_p (2, operands[1])" + [(const_int 0)] + " +{ + emit_insn (gen_ashlsi_c (operands[1], operands[1])); + DONE; +}") + +(define_insn "ashlsi_c" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (const_int 1))) + (set (reg:SI T_REG) + (lt:SI (match_dup 1) (const_int 0)))] + "TARGET_SH1" + "shll %0" + [(set_attr "type" "arith")]) + +(define_insn "*ashlsi_c_void" + [(set (reg:SI T_REG) + (lt:SI (match_operand:SI 0 "arith_reg_operand" "r") (const_int 0))) + (clobber (match_scratch:SI 1 "=0"))] + "TARGET_SH1 && cse_not_expected" + "shll %0" + [(set_attr "type" "arith")]) + +(define_insn "ashrsi3_d" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_SH3" + "shad %2,%0" + [(set_attr "type" "dyn_shift")]) + +(define_insn "ashrsi3_n" + [(set (reg:SI R4_REG) + (ashiftrt:SI (reg:SI R4_REG) + (match_operand:SI 0 "const_int_operand" "i"))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH1" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "ashrsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (ashiftrt:SI (match_operand:SI 1 "extend_reg_operand" "r,r") + (match_operand:SI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA" + "@ + shard.l %1, %2, %0 + shari.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_expand "ashrsi3" + [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:SI T_REG))])] + "" + " +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (expand_ashiftrt (operands)) + DONE; + else + FAIL; +}") + +;; logical shift right + +(define_insn "lshrsi3_sh2a" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_SH2A" + "shld %2,%0" + [(set_attr "type" "dyn_shift") + (set_attr "length" "4")]) + +(define_insn "lshrsi3_d" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_SH3" + "shld %2,%0" + [(set_attr "type" "dyn_shift")]) + +;; Only the single bit shift clobbers the T bit. + +(define_insn "lshrsi3_m" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "M"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && satisfies_constraint_M (operands[2])" + "shlr %0" + [(set_attr "type" "arith")]) + +(define_insn "lshrsi3_k" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "P27")))] + "TARGET_SH1 && satisfies_constraint_P27 (operands[2]) + && ! satisfies_constraint_M (operands[2])" + "shlr%O2 %0" + [(set_attr "type" "arith")]) + +(define_insn "lshrsi3_n" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && ! sh_dynamicalize_shift_p (operands[2])" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1)) + (const_string "2") + (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3)) + (const_string "6")] + (const_string "8"))) + (set_attr "type" "arith")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && reload_completed" + [(use (reg:SI R0_REG))] + " +{ + gen_shifty_op (LSHIFTRT, operands); + DONE; +}") + +(define_insn "lshrsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (lshiftrt:SI (match_operand:SI 1 "extend_reg_operand" "r,r") + (match_operand:SI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA" + "@ + shlrd.l %1, %2, %0 + shlri.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_expand "lshrsi3" + [(parallel [(set (match_operand:SI 0 "arith_reg_dest" "") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:SI T_REG))])] + "" + " +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_lshrsi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (CONST_INT_P (operands[2]) + && sh_dynamicalize_shift_p (operands[2])) + operands[2] = force_reg (SImode, operands[2]); + if (TARGET_SH3 && arith_reg_operand (operands[2], GET_MODE (operands[2]))) + { + rtx count = copy_to_mode_reg (SImode, operands[2]); + emit_insn (gen_negsi2 (count, count)); + emit_insn (gen_lshrsi3_d (operands[0], operands[1], count)); + DONE; + } + if (! immediate_operand (operands[2], GET_MODE (operands[2]))) + FAIL; +}") + +;; ??? This should be a define expand. + +(define_insn "ashldi3_k" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "shll %R0\;rotcl %S0" + [(set_attr "length" "4") + (set_attr "type" "arith")]) + +;; Expander for DImode shift left with SImode operations. + +(define_expand "ashldi3_std" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SH1 && INTVAL (operands[2]) < 32" + " +{ + int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1); + int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0); + rtx low_src = operand_subword (operands[1], low_word, 0, DImode); + rtx high_src = operand_subword (operands[1], high_word, 0, DImode); + rtx dst = gen_reg_rtx (DImode); + rtx low_dst = operand_subword (dst, low_word, 1, DImode); + rtx high_dst = operand_subword (dst, high_word, 1, DImode); + rtx tmp0, tmp1; + + tmp0 = gen_reg_rtx (SImode); + tmp1 = gen_reg_rtx (SImode); + emit_insn (gen_lshrsi3 (tmp0, low_src, GEN_INT (32 - INTVAL (operands[2])))); + emit_insn (gen_ashlsi3 (low_dst, low_src, operands[2])); + emit_insn (gen_ashlsi3 (tmp1, high_src, operands[2])); + emit_insn (gen_iorsi3 (high_dst, tmp0, tmp1)); + emit_move_insn (operands[0], dst); + DONE; +}") + +(define_insn "ashldi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "r,r") + (match_operand:DI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA" + "@ + shlld %1, %2, %0 + shlli %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*ashldisi3_media" + [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0) + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SHMEDIA && INTVAL (operands[2]) < 32" + "shlli.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_expand "ashldi3" + [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:SI T_REG))])] + "" + " +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 1) + { + emit_insn (gen_ashldi3_k (operands[0], operands[1])); + DONE; + } + else if (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) < 32) + { + emit_insn (gen_ashldi3_std (operands[0], operands[1], operands[2])); + DONE; + } + else + FAIL; +}") + +;; ??? This should be a define expand. + +(define_insn "lshrdi3_k" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "shlr %S0\;rotcr %R0" + [(set_attr "length" "4") + (set_attr "type" "arith")]) + +(define_insn "lshrdi3_media" + [(set (match_operand:DI 0 "ext_dest_operand" "=r,r") + (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r,r") + (match_operand:DI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA + && (arith_reg_dest (operands[0], DImode) + || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) > 32))" + "@ + shlrd %1, %2, %0 + shlri %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*lshrdisi3_media" + [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0) + (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SHMEDIA && INTVAL (operands[2]) < 32" + "shlri.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_expand "lshrdi3" + [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "") + (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:SI T_REG))])] + "" + " +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_lshrdi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) != 1) + FAIL; +}") + +;; ??? This should be a define expand. + +(define_insn "ashrdi3_k" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "shar %S0\;rotcr %R0" + [(set_attr "length" "4") + (set_attr "type" "arith")]) + +(define_insn "ashrdi3_media" + [(set (match_operand:DI 0 "ext_dest_operand" "=r,r") + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r,r") + (match_operand:DI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA + && (arith_reg_dest (operands[0], DImode) + || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) >= 32))" + "@ + shard %1, %2, %0 + shari %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*ashrdisi3_media" + [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0) + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SHMEDIA && INTVAL (operands[2]) < 32" + "shari.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "ashrdisi3_media_high" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (truncate:SI + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n"))))] + "TARGET_SHMEDIA && INTVAL (operands[2]) >= 32" + "shari %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "ashrdisi3_media_opaque" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (unspec:SI [(match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")] + UNSPEC_ASHIFTRT))] + "TARGET_SHMEDIA" + "shari %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_expand "ashrdi3" + [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "") + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:SI T_REG))])] + "" + " +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) != 1) + FAIL; +}") + +;; combined left/right shift + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" "")))] + "TARGET_SH1 && reload_completed && (unsigned)INTVAL (operands[2]) < 32" + [(use (reg:SI R0_REG))] + "if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) FAIL; + DONE;") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && reload_completed && (unsigned)INTVAL (operands[2]) < 32" + [(use (reg:SI R0_REG))] + "if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) FAIL; + DONE;") + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && shl_and_kind (operands[2], operands[3], 0) == 1" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_and_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_and_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_and_length (insn)") (const_int 5)) + (const_string "10") + (eq (symbol_ref "shl_and_length (insn)") (const_int 6)) + (const_string "12") + (eq (symbol_ref "shl_and_length (insn)") (const_int 7)) + (const_string "14") + (eq (symbol_ref "shl_and_length (insn)") (const_int 8)) + (const_string "16")] + (const_string "18"))) + (set_attr "type" "arith")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=z") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && shl_and_kind (operands[2], operands[3], 0) == 2" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_and_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_and_length (insn)") (const_int 4)) + (const_string "8")] + (const_string "10"))) + (set_attr "type" "arith")]) + +;; shift left / and combination with a scratch register: The combine pass +;; does not accept the individual instructions, even though they are +;; cheap. But it needs a precise description so that it is usable after +;; reload. +(define_insn "and_shl_scratch" + [(set (match_operand:SI 0 "register_operand" "=r,&r") + (lshiftrt:SI + (ashift:SI + (and:SI + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0") + (match_operand:SI 2 "const_int_operand" "N,n")) + (match_operand:SI 3 "" "0,r")) + (match_operand:SI 4 "const_int_operand" "n,n")) + (match_operand:SI 5 "const_int_operand" "n,n"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_and_scr_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 5)) + (const_string "10")] + (const_string "12"))) + (set_attr "type" "arith")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (lshiftrt:SI + (ashift:SI + (and:SI + (lshiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "register_operand" "")) + (match_operand:SI 4 "const_int_operand" "")) + (match_operand:SI 5 "const_int_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(use (reg:SI R0_REG))] + " +{ + rtx and_source = operands[rtx_equal_p (operands[0], operands[1]) ? 3 : 1]; + + if (INTVAL (operands[2])) + { + gen_shifty_op (LSHIFTRT, operands); + } + emit_insn (gen_andsi3 (operands[0], operands[0], and_source)); + operands[2] = operands[4]; + gen_shifty_op (ASHIFT, operands); + if (INTVAL (operands[5])) + { + operands[2] = operands[5]; + gen_shifty_op (LSHIFTRT, operands); + } + DONE; +}") + +;; signed left/right shift combination. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" "") + (const_int 0))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(use (reg:SI R0_REG))] + "if (gen_shl_sext (operands[0], operands[2], operands[3], operands[1])) FAIL; + DONE;") + +(define_insn "shl_sext_ext" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI + (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n") + (const_int 0))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && (unsigned)shl_sext_kind (operands[2], operands[3], 0) - 1 < 5" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 1)) + (const_string "2") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 5)) + (const_string "10") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 6)) + (const_string "12") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 7)) + (const_string "14") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 8)) + (const_string "16")] + (const_string "18"))) + (set_attr "type" "arith")]) + +(define_insn "shl_sext_sub" + [(set (match_operand:SI 0 "register_operand" "=z") + (sign_extract:SI + (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n") + (const_int 0))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && (shl_sext_kind (operands[2], operands[3], 0) & ~1) == 6" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 5)) + (const_string "10") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 6)) + (const_string "12")] + (const_string "14"))) + (set_attr "type" "arith")]) + +;; These patterns are found in expansions of DImode shifts by 16, and +;; allow the xtrct instruction to be generated from C source. + +(define_insn "xtrct_left" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 16)) + (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand" "0") + (const_int 16))))] + "TARGET_SH1" + "xtrct %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "xtrct_right" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 16)) + (ashift:SI (match_operand:SI 2 "arith_reg_operand" "r") + (const_int 16))))] + "TARGET_SH1" + "xtrct %2,%0" + [(set_attr "type" "arith")]) + +;; ------------------------------------------------------------------------- +;; Unary arithmetic +;; ------------------------------------------------------------------------- + +(define_insn "negc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (neg:SI (plus:SI (reg:SI T_REG) + (match_operand:SI 1 "arith_reg_operand" "r")))) + (set (reg:SI T_REG) + (ne:SI (ior:SI (reg:SI T_REG) (match_dup 1)) + (const_int 0)))] + "TARGET_SH1" + "negc %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "*negdi_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (neg:DI (match_operand:DI 1 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "sub r63, %1, %0" + [(set_attr "type" "arith_media")]) + +(define_expand "negdi2" + [(set (match_operand:DI 0 "arith_reg_operand" "") + (neg:DI (match_operand:DI 1 "arith_reg_operand" "")))] + "" + " +{ + if (TARGET_SH1) + { + int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1); + int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0); + + rtx low_src = operand_subword (operands[1], low_word, 0, DImode); + rtx high_src = operand_subword (operands[1], high_word, 0, DImode); + + rtx low_dst = operand_subword (operands[0], low_word, 1, DImode); + rtx high_dst = operand_subword (operands[0], high_word, 1, DImode); + + emit_insn (gen_clrt ()); + emit_insn (gen_negc (low_dst, low_src)); + emit_insn (gen_negc (high_dst, high_src)); + DONE; + } +}") + +(define_insn "negsi2" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (neg:SI (match_operand:SI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "neg %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (not:SI (match_operand:SI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "not %1,%0" + [(set_attr "type" "arith")]) + +(define_expand "one_cmpldi2" + [(set (match_operand:DI 0 "arith_reg_dest" "") + (xor:DI (match_operand:DI 1 "arith_reg_operand" "") + (const_int -1)))] + "TARGET_SHMEDIA" "") + +/* The SH4 202 can do zero-offset branches without pipeline stalls. + This can be used as some kind of conditional execution, which is useful + for abs. */ +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (plus:SI (xor:SI (neg:SI (reg:SI T_REG)) + (match_operand:SI 1 "arith_reg_operand" "")) + (reg:SI T_REG)))] + "TARGET_HARD_SH4" + [(const_int 0)] + "emit_insn (gen_movsi_i (operands[0], operands[1])); + emit_insn (gen_cneg (operands[0], operands[0], operands[0])); + DONE;") + +(define_insn "cneg" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (if_then_else:SI (eq:SI (reg:SI T_REG) (const_int 0)) + (match_operand:SI 1 "arith_reg_operand" "0") + (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_HARD_SH4" + "bf 0f\;neg %2,%0\\n0:" + [(set_attr "type" "arith") ;; poor approximation + (set_attr "length" "4")]) + + +;; ------------------------------------------------------------------------- +;; Zero extension instructions +;; ------------------------------------------------------------------------- + +(define_insn "zero_extendsidi2" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extend:DI (match_operand:SI 1 "extend_reg_operand" "r")))] + "TARGET_SHMEDIA" + "addz.l %1, r63, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "extend")]) + +(define_insn "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:HI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.uw %m1, %0" + [(set_attr "type" "*,load_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:HI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 48))) + (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))] + " +{ + if (GET_CODE (operands[1]) == TRUNCATE) + operands[1] = XEXP (operands[1], 0); +}") + +;; ??? when a truncated input to a zero_extend is reloaded, reload will +;; reload the entire truncate expression. +(define_insn_and_split "*loaddi_trunc" + [(set (match_operand 0 "any_register_operand" "=r") + (truncate (match_operand:DI 1 "memory_operand" "m")))] + "TARGET_SHMEDIA && reload_completed" + "#" + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (match_dup 1))] + "operands[0] = gen_rtx_REG (DImode, true_regnum (operands[0]));") + +(define_insn "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:QI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + andi %1, 255, %0 + ld%M1.ub %m1, %0" + [(set_attr "type" "arith_media,load_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (zero_extend:SI (match_operand:HI 1 "general_extend_operand" "")))] + "" + " +{ + if (! TARGET_SHMEDIA && ! arith_reg_operand (operands[1], HImode)) + operands[1] = copy_to_mode_reg (HImode, operands[1]); +}") + +(define_insn "*zero_extendhisi2_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extend:SI (match_operand:HI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "extu.w %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "*zero_extendhisi2_media" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.uw %m1, %0" + [(set_attr "type" "arith_media,load_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))] + " +{ + rtx op1 = operands[1]; + + if (GET_CODE (op1) == TRUNCATE) + op1 = XEXP (op1, 0); + operands[2] + = simplify_gen_subreg (SImode, op1, GET_MODE (op1), + subreg_lowpart_offset (SImode, GET_MODE (op1))); +}") + +(define_expand "zero_extendqisi2" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (zero_extend:SI (match_operand:QI 1 "general_extend_operand" "")))] + "" + " +{ + if (! TARGET_SHMEDIA && ! arith_reg_operand (operands[1], QImode)) + operands[1] = copy_to_mode_reg (QImode, operands[1]); +}") + +(define_insn "*zero_extendqisi2_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extend:SI (match_operand:QI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "extu.b %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "*zero_extendqisi2_media" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + andi %1, 255, %0 + ld%M1.ub %m1, %0" + [(set_attr "type" "arith_media,load_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_insn "zero_extendqihi2" + [(set (match_operand:HI 0 "arith_reg_dest" "=r") + (zero_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "extu.b %1,%0" + [(set_attr "type" "arith")]) + +;; ------------------------------------------------------------------------- +;; Sign extension instructions +;; ------------------------------------------------------------------------- + +;; ??? This should be a define expand. +;; ??? Or perhaps it should be dropped? + +;; convert_move generates good code for SH[1-4]. +(define_insn "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,?f")))] + "TARGET_SHMEDIA" + "@ + add.l %1, r63, %0 + ld%M1.l %m1, %0 + fmov.sl %1, %0" + [(set_attr "type" "arith_media,load_media,fpconv_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "extend")))]) + +(define_insn "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI (match_operand:HI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.w %m1, %0" + [(set_attr "type" "*,load_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:HI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 48))) + (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))] + " +{ + if (GET_CODE (operands[1]) == TRUNCATE) + operands[1] = XEXP (operands[1], 0); +}") + +(define_insn "extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI (match_operand:QI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.b %m1, %0" + [(set_attr "type" "*,load_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:QI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 56))) + (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))] + " +{ + if (GET_CODE (operands[1]) == TRUNCATE) + operands[1] = XEXP (operands[1], 0); +}") + +(define_expand "extendhisi2" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (sign_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))] + "" + "") + +(define_insn "*extendhisi2_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (sign_extend:SI (match_operand:HI 1 "general_movsrc_operand" "r,m")))] + "TARGET_SH1" + "@ + exts.w %1,%0 + mov.w %1,%0" + [(set_attr "type" "arith,load")]) + +(define_insn "*extendhisi2_media" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (sign_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.w %m1, %0" + [(set_attr "type" "arith_media,load_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:HI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))] + " +{ + rtx op1 = operands[1]; + if (GET_CODE (op1) == TRUNCATE) + op1 = XEXP (op1, 0); + operands[2] + = simplify_gen_subreg (SImode, op1, GET_MODE (op1), + subreg_lowpart_offset (SImode, GET_MODE (op1))); +}") + +(define_expand "extendqisi2" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (sign_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))] + "" + "") + +(define_insn "*extendqisi2_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (sign_extend:SI (match_operand:QI 1 "general_movsrc_operand" "r,m")))] + "TARGET_SH1" + "@ + exts.b %1,%0 + mov.b %1,%0" + [(set_attr "type" "arith,load") + (set_attr_alternative "length" + [(const_int 2) + (if_then_else + (ne (symbol_ref "TARGET_SH2A") (const_int 0)) + (const_int 4) (const_int 2))])]) + +(define_insn "*extendqisi2_media" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (sign_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.b %m1, %0" + [(set_attr "type" "arith_media,load_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:QI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 24)))] + " +{ + rtx op1 = operands[1]; + if (GET_CODE (op1) == TRUNCATE) + op1 = XEXP (op1, 0); + operands[2] + = simplify_gen_subreg (SImode, op1, GET_MODE (op1), + subreg_lowpart_offset (SImode, GET_MODE (op1))); +}") + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "arith_reg_dest" "=r,r") + (sign_extend:HI (match_operand:QI 1 "general_movsrc_operand" "r,m")))] + "TARGET_SH1" + "@ + exts.b %1,%0 + mov.b %1,%0" + [(set_attr "type" "arith,load") + (set_attr_alternative "length" + [(const_int 2) + (if_then_else + (ne (symbol_ref "TARGET_SH2A") (const_int 0)) + (const_int 4) (const_int 2))])]) + +/* It would seem useful to combine the truncXi patterns into the movXi + patterns, but unary operators are ignored when matching constraints, + so we need separate patterns. */ +(define_insn "truncdisi2" + [(set (match_operand:SI 0 "general_movdst_operand" "=r,m,m,f,r,f") + (truncate:SI (match_operand:DI 1 "register_operand" "r,r,f,r,f,f")))] + "TARGET_SHMEDIA" + "@ + add.l %1, r63, %0 + st%M0.l %m0, %1 + fst%M0.s %m0, %T1 + fmov.ls %1, %0 + fmov.sl %T1, %0 + fmov.s %T1, %0" + [(set_attr "type" "arith_media,store_media,fstore_media,fload_media,fpconv_media,fmove_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "extend")))]) + +(define_insn "truncdihi2" + [(set (match_operand:HI 0 "general_movdst_operand" "=?r,m") + (truncate:HI (match_operand:DI 1 "register_operand" "r,r")))] + "TARGET_SHMEDIA" + "@ + shlli\\t%1,48,%0\;shlri\\t%0,48,%0 + st%M0.w %m0, %1" + [(set_attr "type" "arith_media,store_media") + (set_attr "length" "8,4") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "extend")))]) + +; N.B. This should agree with LOAD_EXTEND_OP and movqi. +; Because we use zero extension, we can't provide signed QImode compares +; using a simple compare or conditional branch insn. +(define_insn "truncdiqi2" + [(set (match_operand:QI 0 "general_movdst_operand" "=r,m") + (truncate:QI (match_operand:DI 1 "register_operand" "r,r")))] + "TARGET_SHMEDIA" + "@ + andi %1, 255, %0 + st%M0.b %m0, %1" + [(set_attr "type" "arith_media,store") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "extend")))]) +;; ------------------------------------------------------------------------- +;; Move instructions +;; ------------------------------------------------------------------------- + +;; define push and pop so it is easy for sh.c +;; We can't use push and pop on SHcompact because the stack must always +;; be 8-byte aligned. + +(define_expand "push" + [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) + (match_operand:SI 0 "register_operand" "r,l,x"))] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_expand "pop" + [(set (match_operand:SI 0 "register_operand" "=r,l,x") + (mem:SI (post_inc:SI (reg:SI SP_REG))))] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_expand "push_e" + [(parallel [(set (mem:SF (pre_dec:SI (reg:SI SP_REG))) + (match_operand:SF 0 "" "")) + (use (reg:PSI FPSCR_REG)) + (clobber (scratch:SI))])] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_insn "push_fpul" + [(set (mem:SF (pre_dec:SI (reg:SI SP_REG))) (reg:SF FPUL_REG))] + "TARGET_SH2E && ! TARGET_SH5" + "sts.l fpul,@-r15" + [(set_attr "type" "fstore") + (set_attr "late_fp_use" "yes") + (set_attr "hit_stack" "yes")]) + +;; DFmode pushes for sh4 require a lot of what is defined for movdf_i4, +;; so use that. +(define_expand "push_4" + [(parallel [(set (mem:DF (pre_dec:SI (reg:SI SP_REG))) + (match_operand:DF 0 "" "")) + (use (reg:PSI FPSCR_REG)) + (clobber (scratch:SI))])] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_expand "pop_e" + [(parallel [(set (match_operand:SF 0 "" "") + (mem:SF (post_inc:SI (reg:SI SP_REG)))) + (use (reg:PSI FPSCR_REG)) + (clobber (scratch:SI))])] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_insn "pop_fpul" + [(set (reg:SF FPUL_REG) (mem:SF (post_inc:SI (reg:SI SP_REG))))] + "TARGET_SH2E && ! TARGET_SH5" + "lds.l @r15+,fpul" + [(set_attr "type" "load") + (set_attr "hit_stack" "yes")]) + +(define_expand "pop_4" + [(parallel [(set (match_operand:DF 0 "" "") + (mem:DF (post_inc:SI (reg:SI SP_REG)))) + (use (reg:PSI FPSCR_REG)) + (clobber (scratch:SI))])] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_expand "push_fpscr" + [(const_int 0)] + "TARGET_SH2E" + " +{ + rtx insn = emit_insn (gen_fpu_switch (gen_frame_mem (PSImode, + gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)), + get_fpscr_rtx ())); + add_reg_note (insn, REG_INC, stack_pointer_rtx); + DONE; +}") + +(define_expand "pop_fpscr" + [(const_int 0)] + "TARGET_SH2E" + " +{ + rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (), + gen_frame_mem (PSImode, + gen_rtx_POST_INC (Pmode, + stack_pointer_rtx)))); + add_reg_note (insn, REG_INC, stack_pointer_rtx); + DONE; +}") + +;; These two patterns can happen as the result of optimization, when +;; comparisons get simplified to a move of zero or 1 into the T reg. +;; They don't disappear completely, because the T reg is a fixed hard reg. + +(define_insn "clrt" + [(set (reg:SI T_REG) (const_int 0))] + "TARGET_SH1" + "clrt") + +(define_insn "sett" + [(set (reg:SI T_REG) (const_int 1))] + "TARGET_SH1" + "sett") + +;; Define additional pop for SH1 and SH2 so it does not get +;; placed in the delay slot. +(define_insn "*movsi_pop" + [(set (match_operand:SI 0 "register_operand" "=r,x,l") + (match_operand:SI 1 "sh_no_delay_pop_operand" ">,>,>"))] + "(TARGET_SH1 || TARGET_SH2E || TARGET_SH2A) + && ! TARGET_SH3" + "@ + mov.l %1,%0 + lds.l %1,%0 + lds.l %1,%0" + [(set_attr "type" "load_si,mem_mac,pload") + (set_attr "length" "2,2,2") + (set_attr "in_delay_slot" "no,no,no")]) + +;; t/r must come after r/r, lest reload will try to reload stuff like +;; (set (subreg:SI (mem:QI (plus:SI (reg:SI SP_REG) (const_int 12)) 0) 0) +;; (made from (set (subreg:SI (reg:QI ###) 0) ) into T. +(define_insn "movsi_i" + [(set (match_operand:SI 0 "general_movdst_operand" + "=r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,r") + (match_operand:SI 1 "general_movsrc_operand" + "Q,r,I08,r,mr,x,l,t,r,x,l,r,r,>,>,i"))] + "TARGET_SH1 + && ! TARGET_SH2E + && ! TARGET_SH2A + && (register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov.l %1,%0 + mov %1,%0 + mov %1,%0 + cmp/pl %1 + mov.l %1,%0 + sts %1,%0 + sts %1,%0 + movt %0 + mov.l %1,%0 + sts.l %1,%0 + sts.l %1,%0 + lds %1,%0 + lds %1,%0 + lds.l %1,%0 + lds.l %1,%0 + fake %1,%0" + [(set_attr "type" "pcload_si,move,movi8,mt_group,load_si,mac_gp,prget,arith,store,mac_mem,pstore,gp_mac,prset,mem_mac,pload,pcload_si") + (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")]) + +;; t/r must come after r/r, lest reload will try to reload stuff like +;; (subreg:SI (reg:SF FR14_REG) 0) into T (compiling stdlib/strtod.c -m3e -O2) +;; ??? This allows moves from macl to fpul to be recognized, but these moves +;; will require a reload. +;; ??? We can't include f/f because we need the proper FPSCR setting when +;; TARGET_FMOVD is in effect, and mode switching is done before reload. +(define_insn "movsi_ie" + [(set (match_operand:SI 0 "general_movdst_operand" + "=r,r,r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y") + (match_operand:SI 1 "general_movsrc_operand" + "Q,r,I08,I20,I28,r,mr,x,l,t,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))] + "(TARGET_SH2E || TARGET_SH2A) + && (register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov.l %1,%0 + mov %1,%0 + mov %1,%0 + movi20 %1,%0 + movi20s %1,%0 + cmp/pl %1 + mov.l %1,%0 + sts %1,%0 + sts %1,%0 + movt %0 + mov.l %1,%0 + sts.l %1,%0 + sts.l %1,%0 + lds %1,%0 + lds %1,%0 + lds.l %1,%0 + lds.l %1,%0 + lds.l %1,%0 + sts.l %1,%0 + fake %1,%0 + lds %1,%0 + sts %1,%0 + fsts fpul,%0 + flds %1,fpul + fmov %1,%0 + ! move optimized away" + [(set_attr "type" "pcload_si,move,movi8,move,move,*,load_si,mac_gp,prget,arith,store,mac_mem,pstore,gp_mac,prset,mem_mac,pload,load,fstore,pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil") + (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*") + (set_attr_alternative "length" + [(const_int 2) + (const_int 2) + (const_int 2) + (const_int 4) + (const_int 4) + (const_int 2) + (if_then_else + (ne (symbol_ref "TARGET_SH2A") (const_int 0)) + (const_int 4) (const_int 2)) + (const_int 2) + (const_int 2) + (const_int 2) + (if_then_else + (ne (symbol_ref "TARGET_SH2A") (const_int 0)) + (const_int 4) (const_int 2)) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 0)])]) + +(define_insn "movsi_i_lowpart" + [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,r,m,r")) + (match_operand:SI 1 "general_movsrc_operand" "Q,r,I08,mr,x,l,t,r,i"))] + "TARGET_SH1 + && (register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov.l %1,%0 + mov %1,%0 + mov %1,%0 + mov.l %1,%0 + sts %1,%0 + sts %1,%0 + movt %0 + mov.l %1,%0 + fake %1,%0" + [(set_attr "type" "pcload,move,arith,load,mac_gp,prget,arith,store,pcload")]) + +(define_insn_and_split "load_ra" + [(set (match_operand:SI 0 "general_movdst_operand" "") + (unspec:SI [(match_operand:SI 1 "register_operand" "")] UNSPEC_RA))] + "TARGET_SH1" + "#" + "&& ! currently_expanding_to_rtl" + [(set (match_dup 0) (match_dup 1))] + " +{ + if (TARGET_SHCOMPACT && crtl->saves_all_registers) + operands[1] = gen_frame_mem (SImode, return_address_pointer_rtx); +}") + +;; The '?'s in the following constraints may not reflect the time taken +;; to perform the move. They are there to discourage the use of floating- +;; point registers for storing integer values. +(define_insn "*movsi_media" + [(set (match_operand:SI 0 "general_movdst_operand" + "=r,r,r,r,m,f?,m,f?,r,f?,*b,r,b") + (match_operand:SI 1 "general_movsrc_operand" + "r,I16Css,nCpg,m,rZ,m,f?,rZ,f?,f?,r,*b,Csy"))] + "TARGET_SHMEDIA_FPU + && (register_operand (operands[0], SImode) + || sh_register_operand (operands[1], SImode) + || GET_CODE (operands[1]) == TRUNCATE)" + "@ + add.l %1, r63, %0 + movi %1, %0 + # + ld%M1.l %m1, %0 + st%M0.l %m0, %N1 + fld%M1.s %m1, %0 + fst%M0.s %m0, %1 + fmov.ls %N1, %0 + fmov.sl %1, %0 + fmov.s %1, %0 + ptabs %1, %0 + gettr %1, %0 + pt %1, %0" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media,fload_media,fstore_media,fload_media,fpconv_media,fmove_media,ptabs_media,gettr_media,pt_media") + (set_attr "length" "4,4,8,4,4,4,4,4,4,4,4,4,12") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_insn "*movsi_media_nofpu" + [(set (match_operand:SI 0 "general_movdst_operand" + "=r,r,r,r,m,*b,r,*b") + (match_operand:SI 1 "general_movsrc_operand" + "r,I16Css,nCpg,m,rZ,r,*b,Csy"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], SImode) + || sh_register_operand (operands[1], SImode) + || GET_CODE (operands[1]) == TRUNCATE)" + "@ + add.l %1, r63, %0 + movi %1, %0 + # + ld%M1.l %m1, %0 + st%M0.l %m0, %N1 + ptabs %1, %0 + gettr %1, %0 + pt %1, %0" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media,ptabs_media,gettr_media,pt_media") + (set_attr "length" "4,4,8,4,4,4,4,12") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_expand "movsi_const" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (const:SI (unspec:SI [(match_operand:DI 1 "immediate_operand" "s") + (const_int 16)] UNSPEC_EXTRACT_S16))) + (set (match_dup 0) + (ior:SI (ashift:SI (match_dup 0) (const_int 16)) + (const:SI (unspec:SI [(match_dup 1) + (const_int 0)] UNSPEC_EXTRACT_U16))))] + "TARGET_SHMEDIA && reload_completed + && MOVI_SHORI_BASE_OPERAND_P (operands[1])" + " +{ + if (GET_CODE (operands[1]) == LABEL_REF + && GET_CODE (XEXP (operands[1], 0)) == CODE_LABEL) + LABEL_NUSES (XEXP (operands[1], 0)) += 2; + else if (GOTOFF_P (operands[1])) + { + rtx unspec = XEXP (operands[1], 0); + + if (! UNSPEC_GOTOFF_P (unspec)) + { + unspec = XEXP (unspec, 0); + if (! UNSPEC_GOTOFF_P (unspec)) + abort (); + } + if (GET_CODE (XVECEXP (unspec , 0, 0)) == LABEL_REF + && (GET_CODE (XEXP (XVECEXP (unspec, 0, 0), 0)) == CODE_LABEL)) + LABEL_NUSES (XEXP (XVECEXP (unspec, 0, 0), 0)) += 2; + } +}") + +(define_expand "movsi_const_16bit" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (const:SI (unspec:SI [(match_operand:DI 1 "immediate_operand" "s") + (const_int 0)] UNSPEC_EXTRACT_S16)))] + "TARGET_SHMEDIA && flag_pic && reload_completed + && GET_CODE (operands[1]) == SYMBOL_REF" + "") + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (match_operand:SI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && MOVI_SHORI_BASE_OPERAND_P (operands[1])" + [(const_int 0)] + " +{ + rtx insn = emit_insn (gen_movsi_const (operands[0], operands[1])); + + set_unique_reg_note (insn, REG_EQUAL, copy_rtx (operands[1])); + + DONE; +}") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && ((CONST_INT_P (operands[1]) + && ! satisfies_constraint_I16 (operands[1])) + || GET_CODE (operands[1]) == CONST_DOUBLE)" + [(set (subreg:DI (match_dup 0) 0) (match_dup 1))]) + +(define_expand "movsi" + [(set (match_operand:SI 0 "general_movdst_operand" "") + (match_operand:SI 1 "general_movsrc_operand" ""))] + "" + "{ if (prepare_move_operands (operands, SImode)) DONE; }") + +(define_expand "ic_invalidate_line" + [(parallel [(unspec_volatile [(match_operand:SI 0 "register_operand" "+r") + (match_dup 1)] UNSPEC_ICACHE) + (clobber (scratch:SI))])] + "TARGET_HARD_SH4 || TARGET_SH5" + " +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_ic_invalidate_line_media (operands[0])); + DONE; + } + else if (TARGET_SHCOMPACT) + { + operands[1] = function_symbol (NULL, \"__ic_invalidate\", SFUNC_STATIC); + operands[1] = force_reg (Pmode, operands[1]); + emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1])); + DONE; + } + else if (TARGET_SH4A_ARCH || TARGET_SH4_300) + { + emit_insn (gen_ic_invalidate_line_sh4a (operands[0])); + DONE; + } + operands[0] = force_reg (Pmode, operands[0]); + operands[1] = force_reg (Pmode, GEN_INT (trunc_int_for_mode (0xf0000008, + Pmode))); +}") + +;; The address %0 is assumed to be 4-aligned at least. Thus, by ORing +;; 0xf0000008, we get the low-oder bits *1*00 (binary), which fits +;; the requirement *1*00 for associative address writes. The alignment of +;; %0 implies that its least significant bit is cleared, +;; thus we clear the V bit of a matching entry if there is one. +(define_insn "ic_invalidate_line_i" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "r")] + UNSPEC_ICACHE) + (clobber (match_scratch:SI 2 "=&r"))] + "TARGET_HARD_SH4" + "ocbwb\\t@%0\;extu.w\\t%0,%2\;or\\t%1,%2\;mov.l\\t%0,@%2" + [(set_attr "length" "8") + (set_attr "type" "cwb")]) + +(define_insn "ic_invalidate_line_sh4a" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_ICACHE)] + "TARGET_SH4A_ARCH || TARGET_SH4_300" + "ocbwb\\t@%0\;synco\;icbi\\t@%0" + [(set_attr "length" "16") + (set_attr "type" "cwb")]) + +;; ??? could make arg 0 an offsettable memory operand to allow to save +;; an add in the code that calculates the address. +(define_insn "ic_invalidate_line_media" + [(unspec_volatile [(match_operand 0 "any_register_operand" "r")] + UNSPEC_ICACHE)] + "TARGET_SHMEDIA" + "ocbwb %0,0\;synco\;icbi %0, 0\;synci" + [(set_attr "length" "16") + (set_attr "type" "invalidate_line_media")]) + +(define_insn "ic_invalidate_line_compact" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "z") + (match_operand:SI 1 "register_operand" "r")] + UNSPEC_ICACHE) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "initialize_trampoline" + [(match_operand:SI 0 "" "") + (match_operand:SI 1 "" "") + (match_operand:SI 2 "" "")] + "TARGET_SHCOMPACT" + " +{ + rtx sfun, tramp; + + tramp = force_reg (Pmode, operands[0]); + sfun = force_reg (Pmode, function_symbol (NULL, \"__init_trampoline\", + SFUNC_STATIC)); + emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]); + + emit_insn (gen_initialize_trampoline_compact (tramp, sfun)); + DONE; +}") + +(define_insn "initialize_trampoline_compact" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "z") + (match_operand:SI 1 "register_operand" "r") + (reg:SI R2_REG) (reg:SI R3_REG)] + UNSPEC_INIT_TRAMP) + + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "movqi_i" + [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m,r,r,l") + (match_operand:QI 1 "general_movsrc_operand" "r,i,m,r,t,l,r"))] + "TARGET_SH1 + && (arith_reg_operand (operands[0], QImode) + || arith_reg_operand (operands[1], QImode))" + "@ + mov %1,%0 + mov %1,%0 + mov.b %1,%0 + mov.b %1,%0 + movt %0 + sts %1,%0 + lds %1,%0" + [(set_attr "type" "move,movi8,load,store,arith,prget,prset") + (set_attr_alternative "length" + [(const_int 2) + (const_int 2) + (if_then_else + (ne (symbol_ref "TARGET_SH2A") (const_int 0)) + (const_int 4) (const_int 2)) + (if_then_else + (ne (symbol_ref "TARGET_SH2A") (const_int 0)) + (const_int 4) (const_int 2)) + (const_int 2) + (const_int 2) + (const_int 2)])]) + +(define_insn "*movqi_media" + [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m") + (match_operand:QI 1 "general_movsrc_operand" "r,I16Css,m,rZ"))] + "TARGET_SHMEDIA + && (arith_reg_operand (operands[0], QImode) + || extend_reg_or_0_operand (operands[1], QImode))" + "@ + add.l %1, r63, %0 + movi %1, %0 + ld%M1.ub %m1, %0 + st%M0.b %m0, %N1" + [(set_attr "type" "arith_media,arith_media,load_media,store_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + "{ if (prepare_move_operands (operands, QImode)) DONE; }") + +(define_expand "reload_inqi" + [(set (match_operand:SI 2 "" "=&r") + (match_operand:QI 1 "inqhi_operand" "")) + (set (match_operand:QI 0 "arith_reg_operand" "=r") + (truncate:QI (match_dup 3)))] + "TARGET_SHMEDIA" + " +{ + rtx inner = XEXP (operands[1], 0); + int regno = REGNO (inner); + + regno += HARD_REGNO_NREGS (regno, GET_MODE (inner)) - 1; + operands[1] = gen_rtx_REG (SImode, regno); + operands[3] = gen_rtx_REG (DImode, REGNO (operands[2])); +}") + +/* When storing r0, we have to avoid reg+reg addressing. */ +(define_insn "movhi_i" + [(set (match_operand:HI 0 "general_movdst_operand" "=r,r,r,r,m,r,l,r") + (match_operand:HI 1 "general_movsrc_operand" "Q,rI08,m,t,r,l,r,i"))] + "TARGET_SH1 + && (arith_reg_operand (operands[0], HImode) + || arith_reg_operand (operands[1], HImode)) + && (!MEM_P (operands[0]) + || GET_CODE (XEXP (operands[0], 0)) != PLUS + || !REG_P (XEXP (XEXP (operands[0], 0), 1)) + || ! refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0))" + "@ + mov.w %1,%0 + mov %1,%0 + mov.w %1,%0 + movt %0 + mov.w %1,%0 + sts %1,%0 + lds %1,%0 + fake %1,%0" + [(set_attr "type" "pcload,move,load,move,store,move,move,pcload")]) + +(define_insn "*movhi_media" + [(set (match_operand:HI 0 "general_movdst_operand" "=r,r,r,r,m") + (match_operand:HI 1 "general_movsrc_operand" "r,I16Css,n,m,rZ"))] + "TARGET_SHMEDIA + && (arith_reg_operand (operands[0], HImode) + || arith_reg_or_0_operand (operands[1], HImode))" + "@ + add.l %1, r63, %0 + movi %1, %0 + # + ld%M1.w %m1, %0 + st%M0.w %m0, %N1" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (match_operand:HI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && ! satisfies_constraint_I16 (operands[1])" + [(set (subreg:DI (match_dup 0) 0) (match_dup 1))]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_movdst_operand" "") + (match_operand:HI 1 "general_movsrc_operand" ""))] + "" + "{ if (prepare_move_operands (operands, HImode)) DONE; }") + +(define_expand "reload_inhi" + [(set (match_operand:SI 2 "" "=&r") + (match_operand:HI 1 "inqhi_operand" "")) + (set (match_operand:HI 0 "arith_reg_operand" "=r") + (truncate:HI (match_dup 3)))] + "TARGET_SHMEDIA" + " +{ + rtx inner = XEXP (operands[1], 0); + int regno = REGNO (inner); + + regno += HARD_REGNO_NREGS (regno, GET_MODE (inner)) - 1; + operands[1] = gen_rtx_REG (SImode, regno); + operands[3] = gen_rtx_REG (DImode, REGNO (operands[2])); +}") + +;; x/r can be created by inlining/cse, e.g. for execute/961213-1.c +;; compiled with -m2 -ml -O3 -funroll-loops +(define_insn "*movdi_i" + [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r,*!x") + (match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I08,i,x,r"))] + "TARGET_SH1 + && (arith_reg_operand (operands[0], DImode) + || arith_reg_operand (operands[1], DImode))" + "* return output_movedouble (insn, operands, DImode);" + [(set_attr "length" "4") + (set_attr "type" "pcload,move,load,store,move,pcload,move,move")]) + +;; If the output is a register and the input is memory or a register, we have +;; to be careful and see which word needs to be loaded first. + +(define_split + [(set (match_operand:DI 0 "general_movdst_operand" "") + (match_operand:DI 1 "general_movsrc_operand" ""))] + "TARGET_SH1 && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + " +{ + int regno; + + if ((MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC) + || (MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == POST_INC)) + FAIL; + + switch (GET_CODE (operands[0])) + { + case REG: + regno = REGNO (operands[0]); + break; + case SUBREG: + regno = subreg_regno (operands[0]); + break; + case MEM: + regno = -1; + break; + default: + gcc_unreachable (); + } + + if (regno == -1 + || ! refers_to_regno_p (regno, regno + 1, operands[1], 0)) + { + operands[2] = operand_subword (operands[0], 0, 0, DImode); + operands[3] = operand_subword (operands[1], 0, 0, DImode); + operands[4] = operand_subword (operands[0], 1, 0, DImode); + operands[5] = operand_subword (operands[1], 1, 0, DImode); + } + else + { + operands[2] = operand_subword (operands[0], 1, 0, DImode); + operands[3] = operand_subword (operands[1], 1, 0, DImode); + operands[4] = operand_subword (operands[0], 0, 0, DImode); + operands[5] = operand_subword (operands[1], 0, 0, DImode); + } + + if (operands[2] == 0 || operands[3] == 0 + || operands[4] == 0 || operands[5] == 0) + FAIL; +}") + +;; The '?'s in the following constraints may not reflect the time taken +;; to perform the move. They are there to discourage the use of floating- +;; point registers for storing integer values. +(define_insn "*movdi_media" + [(set (match_operand:DI 0 "general_movdst_operand" + "=r,r,r,rl,m,f?,m,f?,r,f?,*b,r,*b") + (match_operand:DI 1 "general_movsrc_operand" + "r,I16Css,nCpgF,m,rlZ,m,f?,rZ,f?,f?,r,*b,Csy"))] + "TARGET_SHMEDIA_FPU + && (register_operand (operands[0], DImode) + || sh_register_operand (operands[1], DImode))" + "@ + add %1, r63, %0 + movi %1, %0 + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1 + fld%M1.d %m1, %0 + fst%M0.d %m0, %1 + fmov.qd %N1, %0 + fmov.dq %1, %0 + fmov.d %1, %0 + ptabs %1, %0 + gettr %1, %0 + pt %1, %0" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media,fload_media,fstore_media,fload_media,dfpconv_media,fmove_media,ptabs_media,gettr_media,pt_media") + (set_attr "length" "4,4,16,4,4,4,4,4,4,4,4,4,*")]) + +(define_insn "*movdi_media_nofpu" + [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,rl,m,*b,r,*b"); + (match_operand:DI 1 "general_movsrc_operand" "r,I16Css,nCpgF,m,rlZ,r,*b,Csy"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], DImode) + || sh_register_operand (operands[1], DImode))" + "@ + add %1, r63, %0 + movi %1, %0 + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1 + ptabs %1, %0 + gettr %1, %0 + pt %1, %0" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media,ptabs_media,gettr_media,pt_media") + (set_attr "length" "4,4,16,4,4,4,4,*")]) + +(define_insn "*movdi_media_I16" + [(set (match_operand:DI 0 "ext_dest_operand" "=r") + (match_operand:DI 1 "const_int_operand" "I16"))] + "TARGET_SHMEDIA && reload_completed" + "movi %1, %0" + [(set_attr "type" "arith_media") + (set_attr "length" "4")]) + +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && MOVI_SHORI_BASE_OPERAND_P (operands[1])" + [(set (match_dup 0) (match_dup 1))] + " +{ + rtx insn; + + if (TARGET_SHMEDIA64) + insn = emit_insn (gen_movdi_const (operands[0], operands[1])); + else + insn = emit_insn (gen_movdi_const_32bit (operands[0], operands[1])); + + set_unique_reg_note (insn, REG_EQUAL, copy_rtx (operands[1])); + + DONE; +}") + +(define_expand "movdi_const" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s") + (const_int 48)] UNSPEC_EXTRACT_S16))) + (set (match_dup 0) + (ior:DI (ashift:DI (match_dup 0) (const_int 16)) + (const:DI (unspec:DI [(match_dup 1) + (const_int 32)] UNSPEC_EXTRACT_U16)))) + (set (match_dup 0) + (ior:DI (ashift:DI (match_dup 0) (const_int 16)) + (const:DI (unspec:DI [(match_dup 1) + (const_int 16)] UNSPEC_EXTRACT_U16)))) + (set (match_dup 0) + (ior:DI (ashift:DI (match_dup 0) (const_int 16)) + (const:DI (unspec:DI [(match_dup 1) + (const_int 0)] UNSPEC_EXTRACT_U16))))] + "TARGET_SHMEDIA64 && reload_completed + && MOVI_SHORI_BASE_OPERAND_P (operands[1])" + " +{ + sh_mark_label (operands[1], 4); +}") + +(define_expand "movdi_const_32bit" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s") + (const_int 16)] UNSPEC_EXTRACT_S16))) + (set (match_dup 0) + (ior:DI (ashift:DI (match_dup 0) (const_int 16)) + (const:DI (unspec:DI [(match_dup 1) + (const_int 0)] UNSPEC_EXTRACT_U16))))] + "TARGET_SHMEDIA32 && reload_completed + && MOVI_SHORI_BASE_OPERAND_P (operands[1])" + " +{ + sh_mark_label (operands[1], 2); +}") + +(define_expand "movdi_const_16bit" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s") + (const_int 0)] UNSPEC_EXTRACT_S16)))] + "TARGET_SHMEDIA && flag_pic && reload_completed + && GET_CODE (operands[1]) == SYMBOL_REF" + "") + +(define_split + [(set (match_operand:DI 0 "ext_dest_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && CONST_INT_P (operands[1]) + && ! satisfies_constraint_I16 (operands[1])" + [(set (match_dup 0) (match_dup 2)) + (match_dup 1)] + " +{ + unsigned HOST_WIDE_INT val = INTVAL (operands[1]); + unsigned HOST_WIDE_INT low = val; + unsigned HOST_WIDE_INT high = val; + unsigned HOST_WIDE_INT sign; + unsigned HOST_WIDE_INT val2 = val ^ (val-1); + + /* Zero-extend the 16 least-significant bits. */ + low &= 0xffff; + + /* Arithmetic shift right the word by 16 bits. */ + high >>= 16; + if (GET_CODE (operands[0]) == SUBREG + && GET_MODE (SUBREG_REG (operands[0])) == SImode) + { + high &= 0xffff; + high ^= 0x8000; + high -= 0x8000; + } + else + { + sign = 1; + sign <<= (HOST_BITS_PER_WIDE_INT - 16 - 1); + high ^= sign; + high -= sign; + } + do + { + /* If we can't generate the constant with a two-insn movi / shori + sequence, try some other strategies. */ + if (! CONST_OK_FOR_I16 (high)) + { + /* Try constant load / left shift. We know VAL != 0. */ + val2 = val ^ (val-1); + if (val2 > 0x1ffff) + { + int trailing_zeroes = exact_log2 ((val2 >> 16) + 1) + 15; + + if (CONST_OK_FOR_I16 (val >> trailing_zeroes) + || (! CONST_OK_FOR_I16 (high >> 16) + && CONST_OK_FOR_I16 (val >> (trailing_zeroes + 16)))) + { + val2 = (HOST_WIDE_INT) val >> trailing_zeroes; + operands[1] = gen_ashldi3_media (operands[0], operands[0], + GEN_INT (trailing_zeroes)); + break; + } + } + /* Try constant load / right shift. */ + val2 = (val >> 15) + 1; + if (val2 == (val2 & -val2)) + { + int shift = 49 - exact_log2 (val2); + + val2 = trunc_int_for_mode (val << shift, DImode); + if (CONST_OK_FOR_I16 (val2)) + { + operands[1] = gen_lshrdi3_media (operands[0], operands[0], + GEN_INT (shift)); + break; + } + } + /* Try mperm.w . */ + val2 = val & 0xffff; + if ((val >> 16 & 0xffff) == val2 + && (val >> 32 & 0xffff) == val2 + && (val >> 48 & 0xffff) == val2) + { + val2 = (HOST_WIDE_INT) val >> 48; + operands[1] = gen_rtx_REG (V4HImode, true_regnum (operands[0])); + operands[1] = gen_mperm_w0 (operands[1], operands[1]); + break; + } + /* Try movi / mshflo.l */ + val2 = (HOST_WIDE_INT) val >> 32; + if (val2 == ((unsigned HOST_WIDE_INT) + trunc_int_for_mode (val, SImode))) + { + operands[1] = gen_mshflo_l_di (operands[0], operands[0], + operands[0]); + break; + } + /* Try movi / mshflo.l w/ r63. */ + val2 = val + ((HOST_WIDE_INT) -1 << 32); + if ((HOST_WIDE_INT) val2 < 0 && CONST_OK_FOR_I16 (val2)) + { + operands[1] = gen_mshflo_l_di (operands[0], operands[0], + const0_rtx); + break; + } + } + val2 = high; + operands[1] = gen_shori_media (operands[0], operands[0], GEN_INT (low)); + } + while (0); + operands[2] = GEN_INT (val2); +}") + +(define_split + [(set (match_operand:DI 0 "ext_dest_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && GET_CODE (operands[1]) == CONST_DOUBLE" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) + (ior:DI (ashift:DI (match_dup 0) (const_int 16)) (match_dup 1)))] + " +{ + unsigned HOST_WIDE_INT low = CONST_DOUBLE_LOW (operands[1]); + unsigned HOST_WIDE_INT high = CONST_DOUBLE_HIGH (operands[1]); + unsigned HOST_WIDE_INT val = low; + unsigned HOST_WIDE_INT sign; + + /* Zero-extend the 16 least-significant bits. */ + val &= 0xffff; + operands[1] = GEN_INT (val); + + /* Arithmetic shift right the double-word by 16 bits. */ + low >>= 16; + low |= (high & 0xffff) << (HOST_BITS_PER_WIDE_INT - 16); + high >>= 16; + sign = 1; + sign <<= (HOST_BITS_PER_WIDE_INT - 16 - 1); + high ^= sign; + high -= sign; + + /* This will only be true if high is a sign-extension of low, i.e., + it must be either 0 or (unsigned)-1, and be zero iff the + most-significant bit of low is set. */ + if (high + (low >> (HOST_BITS_PER_WIDE_INT - 1)) == 0) + operands[2] = GEN_INT (low); + else + operands[2] = immed_double_const (low, high, DImode); +}") + +(define_insn "shori_media" + [(set (match_operand:DI 0 "ext_dest_operand" "=r,r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0,0") + (const_int 16)) + (match_operand:DI 2 "immediate_operand" "K16Csu,nF")))] + "TARGET_SHMEDIA && (reload_completed || arith_reg_dest (operands[0], DImode))" + "@ + shori %u2, %0 + #" + [(set_attr "type" "arith_media,*")]) + +(define_insn "*shori_media_si" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 16)) + (match_operand:SI 2 "immediate_operand" "K16Csu")))] + "TARGET_SHMEDIA" + "shori %u2, %0") + +(define_expand "movdi" + [(set (match_operand:DI 0 "general_movdst_operand" "") + (match_operand:DI 1 "general_movsrc_operand" ""))] + "" + "{ if (prepare_move_operands (operands, DImode)) DONE; }") + +(define_insn "movdf_media" + [(set (match_operand:DF 0 "general_movdst_operand" "=f,f,r,r,r,f,m,r,m") + (match_operand:DF 1 "general_movsrc_operand" "f,rZ,f,r,F,m,f,m,rZ"))] + "TARGET_SHMEDIA_FPU + && (register_operand (operands[0], DFmode) + || sh_register_operand (operands[1], DFmode))" + "@ + fmov.d %1, %0 + fmov.qd %N1, %0 + fmov.dq %1, %0 + add %1, r63, %0 + # + fld%M1.d %m1, %0 + fst%M0.d %m0, %1 + ld%M1.q %m1, %0 + st%M0.q %m0, %N1" + [(set_attr "type" "fmove_media,fload_media,dfpconv_media,arith_media,*,fload_media,fstore_media,load_media,store_media")]) + +(define_insn "movdf_media_nofpu" + [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m") + (match_operand:DF 1 "general_movsrc_operand" "r,F,m,rZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], DFmode) + || sh_register_operand (operands[1], DFmode))" + "@ + add %1, r63, %0 + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1" + [(set_attr "type" "arith_media,*,load_media,store_media")]) + +(define_split + [(set (match_operand:DF 0 "arith_reg_dest" "") + (match_operand:DF 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 3) (match_dup 2))] + " +{ + int endian = WORDS_BIG_ENDIAN ? 1 : 0; + long values[2]; + REAL_VALUE_TYPE value; + + REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]); + REAL_VALUE_TO_TARGET_DOUBLE (value, values); + + if (HOST_BITS_PER_WIDE_INT >= 64) + operands[2] = immed_double_const ((unsigned long) values[endian] + | ((HOST_WIDE_INT) values[1 - endian] + << 32), 0, DImode); + else + { + gcc_assert (HOST_BITS_PER_WIDE_INT == 32); + operands[2] = immed_double_const (values[endian], values[1 - endian], + DImode); + } + + operands[3] = gen_rtx_REG (DImode, true_regnum (operands[0])); +}") + +;; ??? This should be a define expand. + +(define_insn "movdf_k" + [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m") + (match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))] + "TARGET_SH1 + && (! (TARGET_SH4 || TARGET_SH2A_DOUBLE) || reload_completed + /* ??? We provide some insn so that direct_{load,store}[DFmode] get set */ + || (REG_P (operands[0]) && REGNO (operands[0]) == 3) + || (REG_P (operands[1]) && REGNO (operands[1]) == 3)) + && (arith_reg_operand (operands[0], DFmode) + || arith_reg_operand (operands[1], DFmode))" + "* return output_movedouble (insn, operands, DFmode);" + [(set_attr "length" "4") + (set_attr "type" "move,pcload,load,store")]) + +;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD. +;; However, the d/F/c/z alternative cannot be split directly; it is converted +;; with special code in machine_dependent_reorg into a load of the R0_REG and +;; the d/m/c/X alternative, which is split later into single-precision +;; instructions. And when not optimizing, no splits are done before fixing +;; up pcloads, so we need usable length information for that. +(define_insn "movdf_i4" + [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d") + (match_operand:DF 1 "general_movsrc_operand" "d,r,F,m,d,FQ,m,r,d,r")) + (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c")) + (clobber (match_scratch:SI 3 "=X,X,&z,X,X,X,X,X,X,X"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) + && (arith_reg_operand (operands[0], DFmode) + || arith_reg_operand (operands[1], DFmode))" + { + switch (which_alternative) + { + case 0: + if (TARGET_FMOVD) + return "fmov %1,%0"; + else if (REGNO (operands[0]) != REGNO (operands[1]) + 1) + return "fmov %R1,%R0\n\tfmov %S1,%S0"; + else + return "fmov %S1,%S0\n\tfmov %R1,%R0"; + case 3: + case 4: + return "fmov.d %1,%0"; + default: + return "#"; + } + } + [(set_attr_alternative "length" + [(if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8)) + (const_int 4) + (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6)) + (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6)) + (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6)) + (const_int 4) + (const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn) + ;; We can't use 4-byte push/pop on SHcompact, so we have to + ;; increment or decrement r15 explicitly. + (if_then_else + (ne (symbol_ref "TARGET_SHCOMPACT") (const_int 0)) + (const_int 10) (const_int 8)) + (if_then_else + (ne (symbol_ref "TARGET_SHCOMPACT") (const_int 0)) + (const_int 10) (const_int 8))]) + (set_attr "type" "fmove,move,pcfload,fload,fstore,pcload,load,store,load,fload") + (set_attr "late_fp_use" "*,*,*,*,yes,*,*,*,*,*") + (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes") + (const_string "double") + (const_string "none")))]) + +;; Moving DFmode between fp/general registers through memory +;; (the top of the stack) is faster than moving through fpul even for +;; little endian. Because the type of an instruction is important for its +;; scheduling, it is beneficial to split these operations, rather than +;; emitting them in one single chunk, even if this will expose a stack +;; use that will prevent scheduling of other stack accesses beyond this +;; instruction. +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 "=X"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed + && (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)" + [(const_int 0)] + " +{ + rtx insn, tos; + + if (TARGET_SH5 && true_regnum (operands[1]) < 16) + { + emit_move_insn (stack_pointer_rtx, + plus_constant (stack_pointer_rtx, -8)); + tos = gen_tmp_stack_mem (DFmode, stack_pointer_rtx); + } + else + tos = gen_tmp_stack_mem (DFmode, + gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)); + insn = emit_insn (gen_movdf_i4 (tos, operands[1], operands[2])); + if (! (TARGET_SH5 && true_regnum (operands[1]) < 16)) + add_reg_note (insn, REG_INC, stack_pointer_rtx); + if (TARGET_SH5 && true_regnum (operands[0]) < 16) + tos = gen_tmp_stack_mem (DFmode, stack_pointer_rtx); + else + tos = gen_tmp_stack_mem (DFmode, + gen_rtx_POST_INC (Pmode, stack_pointer_rtx)); + insn = emit_insn (gen_movdf_i4 (operands[0], tos, operands[2])); + if (TARGET_SH5 && true_regnum (operands[0]) < 16) + emit_move_insn (stack_pointer_rtx, plus_constant (stack_pointer_rtx, 8)); + else + add_reg_note (insn, REG_INC, stack_pointer_rtx); + DONE; +}") + +;; local-alloc sometimes allocates scratch registers even when not required, +;; so we must be prepared to handle these. + +;; Remove the use and clobber from a movdf_i4 so that we can use movdf_k. +(define_split + [(set (match_operand:DF 0 "general_movdst_operand" "") + (match_operand:DF 1 "general_movsrc_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) + && reload_completed + && true_regnum (operands[0]) < 16 + && true_regnum (operands[1]) < 16" + [(set (match_dup 0) (match_dup 1))] + " +{ + /* If this was a reg <-> mem operation with base + index reg addressing, + we have to handle this in a special way. */ + rtx mem = operands[0]; + int store_p = 1; + if (! memory_operand (mem, DFmode)) + { + mem = operands[1]; + store_p = 0; + } + if (GET_CODE (mem) == SUBREG && SUBREG_BYTE (mem) == 0) + mem = SUBREG_REG (mem); + if (MEM_P (mem)) + { + rtx addr = XEXP (mem, 0); + if (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) + && REG_P (XEXP (addr, 1))) + { + int offset; + rtx reg0 = gen_rtx_REG (Pmode, 0); + rtx regop = operands[store_p], word0 ,word1; + + if (GET_CODE (regop) == SUBREG) + alter_subreg (®op); + if (REGNO (XEXP (addr, 0)) == REGNO (XEXP (addr, 1))) + offset = 2; + else + offset = 4; + mem = copy_rtx (mem); + PUT_MODE (mem, SImode); + word0 = gen_rtx_SUBREG (SImode, regop, 0); + alter_subreg (&word0); + word1 = gen_rtx_SUBREG (SImode, regop, 4); + alter_subreg (&word1); + if (store_p || ! refers_to_regno_p (REGNO (word0), + REGNO (word0) + 1, addr, 0)) + { + emit_insn (store_p + ? gen_movsi_ie (mem, word0) + : gen_movsi_ie (word0, mem)); + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset))); + mem = copy_rtx (mem); + emit_insn (store_p + ? gen_movsi_ie (mem, word1) + : gen_movsi_ie (word1, mem)); + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset))); + } + else + { + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset))); + emit_insn (gen_movsi_ie (word1, mem)); + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset))); + mem = copy_rtx (mem); + emit_insn (gen_movsi_ie (word0, mem)); + } + DONE; + } + } +}") + +;; Split away the clobber of r0 after machine_dependent_reorg has fixed pcloads. +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (reg:SI R0_REG))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (use (match_dup 2)) + (clobber (scratch:SI))])] + "") + +(define_expand "reload_indf__frn" + [(parallel [(set (match_operand:DF 0 "register_operand" "=a") + (match_operand:DF 1 "immediate_operand" "FQ")) + (use (reg:PSI FPSCR_REG)) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "TARGET_SH1" + "") + +(define_expand "reload_outdf__RnFRm" + [(parallel [(set (match_operand:DF 0 "register_operand" "=r,f") + (match_operand:DF 1 "register_operand" "af,r")) + (clobber (match_operand:SI 2 "register_operand" "=&y,y"))])] + "TARGET_SH1" + "") + +;; Simplify no-op moves. +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "TARGET_SH2E && reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(set (match_dup 0) (match_dup 0))] + "") + +;; fmovd substitute post-reload splits +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "TARGET_SH4 && ! TARGET_FMOVD && reload_completed + && FP_OR_XD_REGISTER_P (true_regnum (operands[0])) + && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))" + [(const_int 0)] + " +{ + int dst = true_regnum (operands[0]), src = true_regnum (operands[1]); + emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, dst), + gen_rtx_REG (SFmode, src), operands[2])); + emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, dst + 1), + gen_rtx_REG (SFmode, src + 1), operands[2])); + DONE; +}") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (mem:DF (match_operand:SI 1 "register_operand" ""))) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed + && FP_OR_XD_REGISTER_P (true_regnum (operands[0])) + && find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))" + [(const_int 0)] + " +{ + int regno = true_regnum (operands[0]); + rtx insn; + rtx mem = SET_SRC (XVECEXP (PATTERN (curr_insn), 0, 0)); + rtx mem2 + = change_address (mem, SFmode, gen_rtx_POST_INC (Pmode, operands[1])); + insn = emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, + regno + !! TARGET_LITTLE_ENDIAN), + mem2, operands[2])); + add_reg_note (insn, REG_INC, operands[1]); + insn = emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, + regno + ! TARGET_LITTLE_ENDIAN), + change_address (mem, SFmode, NULL_RTX), + operands[2])); + DONE; +}") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed + && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))" + [(const_int 0)] +{ + int regno = true_regnum (operands[0]); + rtx addr, insn; + rtx mem2 = change_address (operands[1], SFmode, NULL_RTX); + rtx reg0 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + rtx reg1 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 0 : 1)); + + operands[1] = copy_rtx (mem2); + addr = XEXP (mem2, 0); + + switch (GET_CODE (addr)) + { + case REG: + /* This is complicated. If the register is an arithmetic register + we can just fall through to the REG+DISP case below. Otherwise + we have to use a combination of POST_INC and REG addressing... */ + if (! arith_reg_operand (operands[1], SFmode)) + { + XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr); + insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + + emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + + /* If we have modified the stack pointer, the value that we have + read with post-increment might be modified by an interrupt, + so write it back. */ + if (REGNO (XEXP (addr, 0)) == STACK_POINTER_REGNUM) + emit_insn (gen_push_e (reg0)); + else + emit_insn (gen_addsi3 (XEXP (operands[1], 0), XEXP (operands[1], 0), GEN_INT (-4))); + break; + } + /* Fall through. */ + + case PLUS: + emit_insn (gen_movsf_ie (reg0, operands[1], operands[2])); + operands[1] = copy_rtx (operands[1]); + XEXP (operands[1], 0) = plus_constant (addr, 4); + emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + break; + + case POST_INC: + insn = emit_insn (gen_movsf_ie (reg0, operands[1], operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + + insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + break; + + default: + debug_rtx (addr); + gcc_unreachable (); + } + + DONE; +}) + +(define_split + [(set (match_operand:DF 0 "memory_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed + && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))" + [(const_int 0)] +{ + int regno = true_regnum (operands[1]); + rtx insn, addr; + rtx reg0 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + rtx reg1 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 0 : 1)); + + operands[0] = copy_rtx (operands[0]); + PUT_MODE (operands[0], SFmode); + addr = XEXP (operands[0], 0); + + switch (GET_CODE (addr)) + { + case REG: + /* This is complicated. If the register is an arithmetic register + we can just fall through to the REG+DISP case below. Otherwise + we have to use a combination of REG and PRE_DEC addressing... */ + if (! arith_reg_operand (operands[0], SFmode)) + { + emit_insn (gen_addsi3 (addr, addr, GEN_INT (4))); + emit_insn (gen_movsf_ie (operands[0], reg1, operands[2])); + + operands[0] = copy_rtx (operands[0]); + XEXP (operands[0], 0) = addr = gen_rtx_PRE_DEC (SImode, addr); + + insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + break; + } + /* Fall through. */ + + case PLUS: + /* Since REG+DISP addressing has already been decided upon by gcc + we can rely upon it having chosen an arithmetic register as the + register component of the address. Just emit the lower numbered + register first, to the lower address, then the higher numbered + register to the higher address. */ + emit_insn (gen_movsf_ie (operands[0], reg0, operands[2])); + + operands[0] = copy_rtx (operands[0]); + XEXP (operands[0], 0) = plus_constant (addr, 4); + + emit_insn (gen_movsf_ie (operands[0], reg1, operands[2])); + break; + + case PRE_DEC: + /* This is easy. Output the word to go to the higher address + first (ie the word in the higher numbered register) then the + word to go to the lower address. */ + + insn = emit_insn (gen_movsf_ie (operands[0], reg1, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + + insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + break; + + default: + /* FAIL; */ + debug_rtx (addr); + gcc_unreachable (); + } + + DONE; +}) + +;; If the output is a register and the input is memory or a register, we have +;; to be careful and see which word needs to be loaded first. + +(define_split + [(set (match_operand:DF 0 "general_movdst_operand" "") + (match_operand:DF 1 "general_movsrc_operand" ""))] + "TARGET_SH1 && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + " +{ + int regno; + + if ((MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC) + || (MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == POST_INC)) + FAIL; + + switch (GET_CODE (operands[0])) + { + case REG: + regno = REGNO (operands[0]); + break; + case SUBREG: + regno = subreg_regno (operands[0]); + break; + case MEM: + regno = -1; + break; + default: + gcc_unreachable (); + } + + if (regno == -1 + || ! refers_to_regno_p (regno, regno + 1, operands[1], 0)) + { + operands[2] = operand_subword (operands[0], 0, 0, DFmode); + operands[3] = operand_subword (operands[1], 0, 0, DFmode); + operands[4] = operand_subword (operands[0], 1, 0, DFmode); + operands[5] = operand_subword (operands[1], 1, 0, DFmode); + } + else + { + operands[2] = operand_subword (operands[0], 1, 0, DFmode); + operands[3] = operand_subword (operands[1], 1, 0, DFmode); + operands[4] = operand_subword (operands[0], 0, 0, DFmode); + operands[5] = operand_subword (operands[1], 0, 0, DFmode); + } + + if (operands[2] == 0 || operands[3] == 0 + || operands[4] == 0 || operands[5] == 0) + FAIL; +}") + +;; If a base address generated by LEGITIMIZE_ADDRESS for SImode is +;; used only once, let combine add in the index again. + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "" "")) + (clobber (match_operand 2 "register_operand" ""))] + "TARGET_SH1 && ! reload_in_progress && ! reload_completed + && ALLOW_INDEXED_ADDRESS" + [(use (reg:SI R0_REG))] + " +{ + rtx addr, reg, const_int; + + if (!MEM_P (operands[1])) + FAIL; + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) != PLUS) + FAIL; + reg = XEXP (addr, 0); + const_int = XEXP (addr, 1); + if (! (BASE_REGISTER_RTX_P (reg) && INDEX_REGISTER_RTX_P (operands[2]) + && CONST_INT_P (const_int))) + FAIL; + emit_move_insn (operands[2], const_int); + emit_move_insn (operands[0], + change_address (operands[1], VOIDmode, + gen_rtx_PLUS (SImode, reg, operands[2]))); + DONE; +}") + +(define_split + [(set (match_operand:SI 1 "" "") + (match_operand:SI 0 "register_operand" "")) + (clobber (match_operand 2 "register_operand" ""))] + "TARGET_SH1 && ! reload_in_progress && ! reload_completed + && ALLOW_INDEXED_ADDRESS" + [(use (reg:SI R0_REG))] + " +{ + rtx addr, reg, const_int; + + if (!MEM_P (operands[1])) + FAIL; + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) != PLUS) + FAIL; + reg = XEXP (addr, 0); + const_int = XEXP (addr, 1); + if (! (BASE_REGISTER_RTX_P (reg) && INDEX_REGISTER_RTX_P (operands[2]) + && CONST_INT_P (const_int))) + FAIL; + emit_move_insn (operands[2], const_int); + emit_move_insn (change_address (operands[1], VOIDmode, + gen_rtx_PLUS (SImode, reg, operands[2])), + operands[0]); + DONE; +}") + +(define_expand "movdf" + [(set (match_operand:DF 0 "general_movdst_operand" "") + (match_operand:DF 1 "general_movsrc_operand" ""))] + "" + " +{ + if (prepare_move_operands (operands, DFmode)) DONE; + if (TARGET_SHMEDIA) + { + if (TARGET_SHMEDIA_FPU) + emit_insn (gen_movdf_media (operands[0], operands[1])); + else + emit_insn (gen_movdf_media_nofpu (operands[0], operands[1])); + DONE; + } + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ())); + DONE; + } +}") + +;;This is incompatible with the way gcc uses subregs. +;;(define_insn "movv2sf_i" +;; [(set (match_operand:V2SF 0 "nonimmediate_operand" "=f,f,m") +;; (match_operand:V2SF 1 "nonimmediate_operand" "f,m,f"))] +;; "TARGET_SHMEDIA_FPU +;; && (fp_arith_reg_operand (operands[0], V2SFmode) +;; || fp_arith_reg_operand (operands[1], V2SFmode))" +;; "@ +;; # +;; fld%M1.p %m1, %0 +;; fst%M0.p %m0, %1" +;; [(set_attr "type" "*,fload_media,fstore_media")]) + +(define_insn_and_split "movv2sf_i" + [(set (match_operand:V2SF 0 "general_movdst_operand" "=f,rf,r,m,mf") + (match_operand:V2SF 1 "general_operand" "fm,rfm?,F?,f,rfZ?"))] + "TARGET_SHMEDIA_FPU" + "#" + "TARGET_SHMEDIA_FPU && reload_completed" + [(set (match_dup 0) (match_dup 1))] + " +{ + operands[0] = simplify_gen_subreg (DFmode, operands[0], V2SFmode, 0); + operands[1] = simplify_gen_subreg (DFmode, operands[1], V2SFmode, 0); +}") + +(define_expand "movv2sf" + [(set (match_operand:V2SF 0 "general_movdst_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" ""))] + "TARGET_SHMEDIA_FPU" + " +{ + if (prepare_move_operands (operands, V2SFmode)) + DONE; +}") + +(define_expand "addv2sf3" + [(match_operand:V2SF 0 "fp_arith_reg_operand" "") + (match_operand:V2SF 1 "fp_arith_reg_operand" "") + (match_operand:V2SF 2 "fp_arith_reg_operand" "")] + "TARGET_SHMEDIA_FPU" + " +{ + sh_expand_binop_v2sf (PLUS, operands[0], operands[1], operands[2]); + DONE; +}") + +(define_expand "subv2sf3" + [(match_operand:V2SF 0 "fp_arith_reg_operand" "") + (match_operand:V2SF 1 "fp_arith_reg_operand" "") + (match_operand:V2SF 2 "fp_arith_reg_operand" "")] + "TARGET_SHMEDIA_FPU" + " +{ + sh_expand_binop_v2sf (MINUS, operands[0], operands[1], operands[2]); + DONE; +}") + +(define_expand "mulv2sf3" + [(match_operand:V2SF 0 "fp_arith_reg_operand" "") + (match_operand:V2SF 1 "fp_arith_reg_operand" "") + (match_operand:V2SF 2 "fp_arith_reg_operand" "")] + "TARGET_SHMEDIA_FPU" + " +{ + sh_expand_binop_v2sf (MULT, operands[0], operands[1], operands[2]); + DONE; +}") + +(define_expand "divv2sf3" + [(match_operand:V2SF 0 "fp_arith_reg_operand" "") + (match_operand:V2SF 1 "fp_arith_reg_operand" "") + (match_operand:V2SF 2 "fp_arith_reg_operand" "")] + "TARGET_SHMEDIA_FPU" + " +{ + sh_expand_binop_v2sf (DIV, operands[0], operands[1], operands[2]); + DONE; +}") + +(define_insn_and_split "*movv4sf_i" + [(set (match_operand:V4SF 0 "general_movdst_operand" "=f,rf,r,m,mf") + (match_operand:V4SF 1 "general_operand" "fm,rfm?,F?,f,rfZ?"))] + "TARGET_SHMEDIA_FPU" + "#" + "&& reload_completed" + [(const_int 0)] + " +{ + int i; + + for (i = 0; i < 4/2; i++) + { + rtx x, y; + + if (MEM_P (operands[0])) + x = adjust_address (operands[0], V2SFmode, + i * GET_MODE_SIZE (V2SFmode)); + else + x = simplify_gen_subreg (V2SFmode, operands[0], V4SFmode, i * 8); + + if (MEM_P (operands[1])) + y = adjust_address (operands[1], V2SFmode, + i * GET_MODE_SIZE (V2SFmode)); + else + y = simplify_gen_subreg (V2SFmode, operands[1], V4SFmode, i * 8); + + emit_insn (gen_movv2sf_i (x, y)); + } + + DONE; +}" + [(set_attr "length" "8")]) + +(define_expand "movv4sf" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (match_operand:V4SF 1 "general_operand" ""))] + "TARGET_SHMEDIA_FPU" + " +{ + if (prepare_move_operands (operands, V4SFmode)) + DONE; +}") + +(define_insn_and_split "*movv16sf_i" + [(set (match_operand:V16SF 0 "nonimmediate_operand" "=f,f,m") + (match_operand:V16SF 1 "nonimmediate_operand" "f,m,f"))] + "TARGET_SHMEDIA_FPU" + "#" + "&& reload_completed" + [(const_int 0)] + " +{ + int i; + + for (i = 0; i < 16/2; i++) + { + rtx x,y; + + if (MEM_P (operands[0])) + x = adjust_address (operands[0], V2SFmode, + i * GET_MODE_SIZE (V2SFmode)); + else + { + x = gen_rtx_SUBREG (V2SFmode, operands[0], i * 8); + alter_subreg (&x); + } + + if (MEM_P (operands[1])) + y = adjust_address (operands[1], V2SFmode, + i * GET_MODE_SIZE (V2SFmode)); + else + { + y = gen_rtx_SUBREG (V2SFmode, operands[1], i * 8); + alter_subreg (&y); + } + + emit_insn (gen_movv2sf_i (x, y)); + } + + DONE; +}" + [(set_attr "length" "32")]) + +(define_expand "movv16sf" + [(set (match_operand:V16SF 0 "nonimmediate_operand" "=f,f,m") + (match_operand:V16SF 1 "nonimmediate_operand" "f,m,f"))] + "TARGET_SHMEDIA_FPU" + " +{ + if (prepare_move_operands (operands, V16SFmode)) + DONE; +}") + +(define_insn "movsf_media" + [(set (match_operand:SF 0 "general_movdst_operand" "=f,f,r,r,r,f,m,r,m") + (match_operand:SF 1 "general_movsrc_operand" "f,rZ,f,r,F,m,f,m,rZ"))] + "TARGET_SHMEDIA_FPU + && (register_operand (operands[0], SFmode) + || sh_register_operand (operands[1], SFmode))" + "@ + fmov.s %1, %0 + fmov.ls %N1, %0 + fmov.sl %1, %0 + add.l %1, r63, %0 + # + fld%M1.s %m1, %0 + fst%M0.s %m0, %1 + ld%M1.l %m1, %0 + st%M0.l %m0, %N1" + [(set_attr "type" "fmove_media,fload_media,fpconv_media,arith_media,*,fload_media,fstore_media,load_media,store_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_insn "movsf_media_nofpu" + [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,m") + (match_operand:SF 1 "general_movsrc_operand" "r,F,m,rZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], SFmode) + || sh_register_operand (operands[1], SFmode))" + "@ + add.l %1, r63, %0 + # + ld%M1.l %m1, %0 + st%M0.l %m0, %N1" + [(set_attr "type" "arith_media,*,load_media,store_media") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:SF 0 "arith_reg_dest" "") + (match_operand:SF 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && ! FP_REGISTER_P (true_regnum (operands[0]))" + [(set (match_dup 3) (match_dup 2))] + " +{ + long values; + REAL_VALUE_TYPE value; + + REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (value, values); + operands[2] = GEN_INT (values); + + operands[3] = gen_rtx_REG (DImode, true_regnum (operands[0])); +}") + +(define_insn "movsf_i" + [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r") + (match_operand:SF 1 "general_movsrc_operand" "r,G,FQ,mr,r,r,l"))] + "TARGET_SH1 + && (! TARGET_SH2E + /* ??? We provide some insn so that direct_{load,store}[SFmode] get set */ + || (REG_P (operands[0]) && REGNO (operands[0]) == 3) + || (REG_P (operands[1]) && REGNO (operands[1]) == 3)) + && (arith_reg_operand (operands[0], SFmode) + || arith_reg_operand (operands[1], SFmode))" + "@ + mov %1,%0 + mov #0,%0 + mov.l %1,%0 + mov.l %1,%0 + mov.l %1,%0 + lds %1,%0 + sts %1,%0" + [(set_attr "type" "move,move,pcload,load,store,move,move")]) + +;; We may not split the ry/yr/XX alternatives to movsi_ie, since +;; update_flow_info would not know where to put REG_EQUAL notes +;; when the destination changes mode. +(define_insn "movsf_ie" + [(set (match_operand:SF 0 "general_movdst_operand" + "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,<,y,y") + (match_operand:SF 1 "general_movsrc_operand" + "f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y,>,y")) + (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c")) + (clobber (match_scratch:SI 3 "=X,X,Bsc,Bsc,&z,X,X,X,X,X,X,X,X,y,X,X,X,X,X"))] + + "TARGET_SH2E + && (arith_reg_operand (operands[0], SFmode) + || arith_reg_operand (operands[1], SFmode) + || arith_reg_operand (operands[3], SImode) + || (fpul_operand (operands[0], SFmode) + && memory_operand (operands[1], SFmode) + && GET_CODE (XEXP (operands[1], 0)) == POST_INC) + || (fpul_operand (operands[1], SFmode) + && memory_operand (operands[0], SFmode) + && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC))" + "@ + fmov %1,%0 + mov %1,%0 + fldi0 %0 + fldi1 %0 + # + fmov.s %1,%0 + fmov.s %1,%0 + mov.l %1,%0 + mov.l %1,%0 + mov.l %1,%0 + fsts fpul,%0 + flds %1,fpul + lds.l %1,%0 + # + sts %1,%0 + lds %1,%0 + sts.l %1,%0 + lds.l %1,%0 + ! move optimized away" + [(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,fstore,pcload,load,store,fmove,fmove,load,*,fpul_gp,gp_fpul,fstore,load,nil") + (set_attr "late_fp_use" "*,*,*,*,*,*,yes,*,*,*,*,*,*,*,yes,*,yes,*,*") + (set_attr_alternative "length" + [(const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 4) + (if_then_else + (ne (symbol_ref "TARGET_SH2A") (const_int 0)) + (const_int 4) (const_int 2)) + (if_then_else + (ne (symbol_ref "TARGET_SH2A") (const_int 0)) + (const_int 4) (const_int 2)) + (const_int 2) + (if_then_else + (ne (symbol_ref "TARGET_SH2A") (const_int 0)) + (const_int 4) (const_int 2)) + (if_then_else + (ne (symbol_ref "TARGET_SH2A") (const_int 0)) + (const_int 4) (const_int 2)) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 4) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 0)]) + (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes") + (const_string "single") + (const_string "single")))]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (reg:SI FPUL_REG))] + "TARGET_SH1" + [(parallel [(set (reg:SF FPUL_REG) (match_dup 1)) + (use (match_dup 2)) + (clobber (scratch:SI))]) + (parallel [(set (match_dup 0) (reg:SF FPUL_REG)) + (use (match_dup 2)) + (clobber (scratch:SI))])] + "") + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_movdst_operand" "") + (match_operand:SF 1 "general_movsrc_operand" ""))] + "" + " +{ + if (prepare_move_operands (operands, SFmode)) + DONE; + if (TARGET_SHMEDIA) + { + if (TARGET_SHMEDIA_FPU) + emit_insn (gen_movsf_media (operands[0], operands[1])); + else + emit_insn (gen_movsf_media_nofpu (operands[0], operands[1])); + DONE; + } + if (TARGET_SH2E) + { + emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ())); + DONE; + } +}") + +(define_insn "mov_nop" + [(set (match_operand 0 "any_register_operand" "") (match_dup 0))] + "TARGET_SH2E" + "" + [(set_attr "length" "0") + (set_attr "type" "nil")]) + +(define_expand "reload_insf__frn" + [(parallel [(set (match_operand:SF 0 "register_operand" "=a") + (match_operand:SF 1 "immediate_operand" "FQ")) + (use (reg:PSI FPSCR_REG)) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "TARGET_SH1" + "") + +(define_expand "reload_insi__i_fpul" + [(parallel [(set (match_operand:SI 0 "fpul_operand" "=y") + (match_operand:SI 1 "immediate_operand" "i")) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "TARGET_SH1" + "") + +(define_expand "ptabs" + [(set (match_operand 0 "" "=b") (match_operand 1 "" "r"))] + "TARGET_SHMEDIA" + " +{ + if (!TARGET_PT_FIXED) + { + rtx eq = operands[1]; + + /* ??? For canonical RTL we really should remove any CONST from EQ + before wrapping it in the AND, and finally wrap the EQ into a + const if is constant. However, for reload we must expose the + input register or symbolic constant, and we can't have + different insn structures outside of the operands for different + alternatives of the same pattern. */ + eq = gen_rtx_EQ (SImode, gen_rtx_AND (Pmode, eq, GEN_INT (3)), + GEN_INT (3)); + operands[1] + = (gen_rtx_IF_THEN_ELSE + (PDImode, + eq, + gen_rtx_MEM (PDImode, operands[1]), + gen_rtx_fmt_e (TARGET_SHMEDIA32 ? SIGN_EXTEND : TRUNCATE, + PDImode, operands[1]))); + } +}") + +;; expanded by ptabs expander. +(define_insn "*extendsipdi_media" + [(set (match_operand:PDI 0 "target_reg_operand" "=b,b"); + (if_then_else:PDI (eq (and:SI (match_operand:SI 1 "target_operand" + "r,Csy") + (const_int 3)) + (const_int 3)) + (mem:PDI (match_dup 1)) + (sign_extend:PDI (match_dup 1))))] + "TARGET_SHMEDIA && !TARGET_PT_FIXED" + "@ + ptabs %1, %0 + pt %1, %0" + [(set_attr "type" "ptabs_media,pt_media") + (set_attr "length" "4,*")]) + +(define_insn "*truncdipdi_media" + [(set (match_operand:PDI 0 "target_reg_operand" "=b,b"); + (if_then_else:PDI (eq (and:DI (match_operand:DI 1 "target_operand" + "r,Csy") + (const_int 3)) + (const_int 3)) + (mem:PDI (match_dup 1)) + (truncate:PDI (match_dup 1))))] + "TARGET_SHMEDIA && !TARGET_PT_FIXED" + "@ + ptabs %1, %0 + pt %1, %0" + [(set_attr "type" "ptabs_media,pt_media") + (set_attr "length" "4,*")]) + +(define_insn "*movsi_y" + [(set (match_operand:SI 0 "register_operand" "=y,y") + (match_operand:SI 1 "immediate_operand" "Qi,I08")) + (clobber (match_scratch:SI 2 "=&z,r"))] + "TARGET_SH2E + && (reload_in_progress || reload_completed)" + "#" + [(set_attr "length" "4") + (set_attr "type" "pcload,move")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "immediate_operand" "")) + (clobber (match_operand:SI 2 "register_operand" ""))] + "TARGET_SH1" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (clobber (reg:SI R0_REG))] + "TARGET_SH1" + [(set (match_dup 0) (match_dup 1))] + "") + +;; ------------------------------------------------------------------------ +;; Define the real conditional branch instructions. +;; ------------------------------------------------------------------------ + +(define_insn "branch_true" + [(set (pc) (if_then_else (ne (reg:SI T_REG) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_SH1" + "* return output_branch (1, insn, operands);" + [(set_attr "type" "cbranch")]) + +(define_insn "branch_false" + [(set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_SH1" + "* return output_branch (0, insn, operands);" + [(set_attr "type" "cbranch")]) + +;; Patterns to prevent reorg from re-combining a condbranch with a branch +;; which destination is too far away. +;; The const_int_operand is distinct for each branch target; it avoids +;; unwanted matches with redundant_insn. +(define_insn "block_branch_redirect" + [(set (pc) (unspec [(match_operand 0 "const_int_operand" "")] UNSPEC_BBR))] + "TARGET_SH1" + "" + [(set_attr "length" "0")]) + +;; This one has the additional purpose to record a possible scratch register +;; for the following branch. +;; ??? Unfortunately, just setting the scratch register is not good enough, +;; because the insn then might be deemed dead and deleted. And we can't +;; make the use in the jump insn explicit because that would disable +;; delay slot scheduling from the target. +(define_insn "indirect_jump_scratch" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand 1 "const_int_operand" "")] UNSPEC_BBR)) + (set (pc) (unspec [(const_int 0)] UNSPEC_BBR))] + "TARGET_SH1" + "" + [(set_attr "length" "0")]) + +;; This one is used to preemt an insn from beyond the bra / braf / jmp +;; being pulled into the delay slot of a condbranch that has been made to +;; jump around the unconditional jump because it was out of range. +(define_insn "stuff_delay_slot" + [(set (pc) + (unspec [(match_operand:SI 0 "const_int_operand" "") (pc) + (match_operand:SI 1 "const_int_operand" "")] UNSPEC_BBR))] + "TARGET_SH1" + "" + [(set_attr "length" "0") + (set_attr "cond_delay_slot" "yes")]) + +;; Conditional branch insns + +(define_expand "cbranchint4_media" + [(set (pc) + (if_then_else (match_operator 0 "shmedia_cbranch_comparison_operator" + [(match_operand 1 "" "") + (match_operand 2 "" "")]) + (match_operand 3 "" "") + (pc)))] + "TARGET_SHMEDIA" + " +{ + enum machine_mode mode = GET_MODE (operands[1]); + if (mode == VOIDmode) + mode = GET_MODE (operands[2]); + if (GET_CODE (operands[0]) == EQ || GET_CODE (operands[0]) == NE) + { + operands[1] = force_reg (mode, operands[1]); + if (CONSTANT_P (operands[2]) + && (! satisfies_constraint_I06 (operands[2]))) + operands[2] = force_reg (mode, operands[2]); + } + else + { + if (operands[1] != const0_rtx) + operands[1] = force_reg (mode, operands[1]); + if (operands[2] != const0_rtx) + operands[2] = force_reg (mode, operands[2]); + } + switch (GET_CODE (operands[0])) + { + case LEU: + case LE: + case LTU: + case LT: + operands[0] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[0])), + VOIDmode, operands[2], operands[1]); + operands[1] = XEXP (operands[0], 0); + operands[2] = XEXP (operands[0], 1); + break; + default: + operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), + VOIDmode, operands[1], operands[2]); + break; + } + operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]); +}") + +(define_expand "cbranchfp4_media" + [(set (pc) + (if_then_else (match_operator 0 "sh_float_comparison_operator" + [(match_operand 1 "" "") + (match_operand 2 "" "")]) + (match_operand 3 "" "") + (pc)))] + "TARGET_SHMEDIA" + " +{ + rtx tmp = gen_reg_rtx (SImode); + rtx cmp; + if (GET_CODE (operands[0]) == NE) + cmp = gen_rtx_EQ (SImode, operands[1], operands[2]); + else + cmp = gen_rtx_fmt_ee (GET_CODE (operands[0]), SImode, + operands[1], operands[2]); + + emit_insn (gen_cstore4_media (tmp, cmp, operands[1], operands[2])); + + if (GET_CODE (cmp) == GET_CODE (operands[0])) + operands[0] = gen_rtx_NE (VOIDmode, tmp, const0_rtx); + else + operands[0] = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + operands[1] = tmp; + operands[2] = const0_rtx; + operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]); +}") + +(define_insn "*beq_media_i" + [(set (pc) + (if_then_else (match_operator 3 "equality_comparison_operator" + [(match_operand:DI 1 "arith_reg_operand" "r,r") + (match_operand:DI 2 "arith_operand" "r,I06")]) + (match_operand 0 "target_operand" "b,b") + (pc)))] + "TARGET_SHMEDIA" + "@ + b%o3%' %1, %2, %0%> + b%o3i%' %1, %2, %0%>" + [(set_attr "type" "cbranch_media")]) + +(define_insn "*beq_media_i32" + [(set (pc) + (if_then_else (match_operator 3 "equality_comparison_operator" + [(match_operand:SI 1 "arith_reg_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,I06")]) + (match_operand 0 "target_operand" "b,b") + (pc)))] + "TARGET_SHMEDIA" + "@ + b%o3%' %1, %2, %0%> + b%o3i%' %1, %2, %0%>" + [(set_attr "type" "cbranch_media")]) + +(define_insn "*bgt_media_i" + [(set (pc) + (if_then_else (match_operator 3 "greater_comparison_operator" + [(match_operand:DI 1 "arith_reg_or_0_operand" "rN") + (match_operand:DI 2 "arith_reg_or_0_operand" "rN")]) + (match_operand 0 "target_operand" "b") + (pc)))] + "TARGET_SHMEDIA" + "b%o3%' %N1, %N2, %0%>" + [(set_attr "type" "cbranch_media")]) + +(define_insn "*bgt_media_i32" + [(set (pc) + (if_then_else (match_operator 3 "greater_comparison_operator" + [(match_operand:SI 1 "arith_reg_or_0_operand" "rN") + (match_operand:SI 2 "arith_reg_or_0_operand" "rN")]) + (match_operand 0 "target_operand" "b") + (pc)))] + "TARGET_SHMEDIA" + "b%o3%' %N1, %N2, %0%>" + [(set_attr "type" "cbranch_media")]) + +;; These are only needed to make invert_jump() happy - otherwise, jump +;; optimization will be silently disabled. +(define_insn "*blt_media_i" + [(set (pc) + (if_then_else (match_operator 3 "less_comparison_operator" + [(match_operand:DI 1 "arith_reg_or_0_operand" "rN") + (match_operand:DI 2 "arith_reg_or_0_operand" "rN")]) + (match_operand 0 "target_operand" "b") + (pc)))] + "TARGET_SHMEDIA" + "b%o3%' %N2, %N1, %0%>" + [(set_attr "type" "cbranch_media")]) + +(define_insn "*blt_media_i32" + [(set (pc) + (if_then_else (match_operator 3 "less_comparison_operator" + [(match_operand:SI 1 "arith_reg_or_0_operand" "rN") + (match_operand:SI 2 "arith_reg_or_0_operand" "rN")]) + (match_operand 0 "target_operand" "b") + (pc)))] + "TARGET_SHMEDIA" + "b%o3%' %N2, %N1, %0%>" + [(set_attr "type" "cbranch_media")]) + +;; combiner splitter for test-and-branch on single bit in register. This +;; is endian dependent because the non-paradoxical subreg looks different +;; on big endian. +(define_split + [(set (pc) + (if_then_else + (match_operator 3 "equality_comparison_operator" + [(subreg:SI (zero_extract:DI (subreg:DI (match_operand:SI 1 + "extend_reg_operand" "") + 0) + (const_int 1) + (match_operand 2 + "const_int_operand" "")) 0) + (const_int 0)]) + (match_operand 0 "target_operand" "") + (pc))) + (clobber (match_operand:SI 4 "arith_reg_dest" ""))] + "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN" + [(set (match_dup 4) (ashift:SI (match_dup 1) (match_dup 5))) + (set (pc) (if_then_else (match_dup 6) (match_dup 0) (pc)))] + + " +{ + operands[5] = GEN_INT (31 - INTVAL (operands[2])); + operands[6] = (GET_CODE (operands[3]) == EQ + ? gen_rtx_GE (VOIDmode, operands[4], const0_rtx) + : gen_rtx_GT (VOIDmode, const0_rtx, operands[4])); +}") + +; operand 0 is the loop count pseudo register +; operand 1 is the number of loop iterations or 0 if it is unknown +; operand 2 is the maximum number of loop iterations +; operand 3 is the number of levels of enclosed loops +; operand 4 is the label to jump to at the top of the loop + +(define_expand "doloop_end" + [(parallel [(set (pc) (if_then_else + (ne:SI (match_operand:SI 0 "" "") + (const_int 1)) + (label_ref (match_operand 4 "" "")) + (pc))) + (set (match_dup 0) + (plus:SI (match_dup 0) (const_int -1))) + (clobber (reg:SI T_REG))])] + "TARGET_SH2" + " +{ + if (GET_MODE (operands[0]) != SImode) + FAIL; +} +") + +(define_insn_and_split "doloop_end_split" + [(set (pc) + (if_then_else (ne:SI (match_operand:SI 2 "arith_reg_dest" "0") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus (match_dup 2) (const_int -1))) + (clobber (reg:SI T_REG))] + "TARGET_SH2" + "#" + "" + [(parallel [(set (reg:SI T_REG) + (eq:SI (match_dup 2) (const_int 1))) + (set (match_dup 0) (plus:SI (match_dup 2) (const_int -1)))]) + (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] +"" + [(set_attr "type" "cbranch")]) + + +;; ------------------------------------------------------------------------ +;; Jump and linkage insns +;; ------------------------------------------------------------------------ + +(define_insn "jump_compact" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "TARGET_SH1 && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" + "* +{ + /* The length is 16 if the delay slot is unfilled. */ + if (get_attr_length(insn) > 4) + return output_far_jump(insn, operands[0]); + else + return \"bra %l0%#\"; +}" + [(set_attr "type" "jump") + (set_attr "needs_delay_slot" "yes")]) + +;; ??? It would be much saner to explicitly use the scratch register +;; in the jump insn, and have indirect_jump_scratch only set it, +;; but fill_simple_delay_slots would refuse to do delay slot filling +;; from the target then, as it uses simplejump_p. +;;(define_insn "jump_compact_far" +;; [(set (pc) +;; (label_ref (match_operand 0 "" ""))) +;; (use (match_operand 1 "register_operand" "r")] +;; "TARGET_SH1" +;; "* return output_far_jump(insn, operands[0], operands[1]);" +;; [(set_attr "type" "jump") +;; (set_attr "needs_delay_slot" "yes")]) + +(define_insn "jump_media" + [(set (pc) + (match_operand 0 "target_operand" "b"))] + "TARGET_SHMEDIA" + "blink %0, r63%>" + [(set_attr "type" "jump_media")]) + +(define_expand "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + " +{ + if (TARGET_SH1) + emit_jump_insn (gen_jump_compact (operands[0])); + else if (TARGET_SHMEDIA) + { + if (reload_in_progress || reload_completed) + FAIL; + emit_jump_insn (gen_jump_media (gen_rtx_LABEL_REF (Pmode, + operands[0]))); + } + DONE; +}") + +(define_insn "force_mode_for_call" + [(use (reg:PSI FPSCR_REG))] + "TARGET_SHCOMPACT" + "" + [(set_attr "length" "0") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double")))]) + +(define_insn "calli" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH1" + "* + { + if (TARGET_SH2A && (dbr_sequence_length () == 0)) + return \"jsr/n\\t@%0\"; + else + return \"jsr\\t@%0%#\"; + }" + + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +;; This is TBR relative jump instruction for SH2A architecture. +;; Its use is enabled assigning an attribute "function_vector" +;; and the vector number to a function during its declaration. + +(define_insn "calli_tbr_rel" + [(call (mem (match_operand:SI 0 "symbol_ref_operand" "")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH2A && sh2a_is_function_vector_call (operands[0])" + "* +{ + unsigned HOST_WIDE_INT vect_num; + vect_num = sh2a_get_function_vector_number (operands[0]); + operands[2] = GEN_INT (vect_num * 4); + + return \"jsr/n\\t@@(%O2,tbr)\"; +}" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "no") + (set_attr "fp_set" "unknown")]) + +;; This is a pc-rel call, using bsrf, for use with PIC. + +(define_insn "calli_pcrel" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (use (match_operand 2 "" "")) + (clobber (reg:SI PR_REG))] + "TARGET_SH2" + "bsrf %0\\n%O2:%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +(define_insn_and_split "call_pcrel" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (clobber (reg:SI PR_REG)) + (clobber (match_scratch:SI 2 "=r"))] + "TARGET_SH2" + "#" + "reload_completed" + [(const_int 0)] + " +{ + rtx lab = PATTERN (gen_call_site ()); + + if (SYMBOL_REF_LOCAL_P (operands[0])) + emit_insn (gen_sym_label2reg (operands[2], operands[0], lab)); + else + emit_insn (gen_symPLT_label2reg (operands[2], operands[0], lab)); + emit_call_insn (gen_calli_pcrel (operands[2], operands[1], copy_rtx (lab))); + DONE; +}" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +(define_insn "call_compact" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && ! (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%0%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_compact_rettramp" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI R10_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%0%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_media" + [(call (mem:DI (match_operand 0 "target_reg_operand" "b")) + (match_operand 1 "" "")) + (clobber (reg:DI PR_MEDIA_REG))] + "TARGET_SHMEDIA" + "blink %0, r18" + [(set_attr "type" "jump_media")]) + +(define_insn "call_valuei" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH1" + "* + { + if (TARGET_SH2A && (dbr_sequence_length () == 0)) + return \"jsr/n\\t@%1\"; + else + return \"jsr\\t@%1%#\"; + }" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +;; This is TBR relative jump instruction for SH2A architecture. +;; Its use is enabled assigning an attribute "function_vector" +;; and the vector number to a function during its declaration. + +(define_insn "call_valuei_tbr_rel" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH2A && sh2a_is_function_vector_call (operands[1])" + "* +{ + unsigned HOST_WIDE_INT vect_num; + vect_num = sh2a_get_function_vector_number (operands[1]); + operands[3] = GEN_INT (vect_num * 4); + + return \"jsr/n\\t@@(%O3,tbr)\"; +}" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "no") + (set_attr "fp_set" "unknown")]) + +(define_insn "call_valuei_pcrel" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (use (match_operand 3 "" "")) + (clobber (reg:SI PR_REG))] + "TARGET_SH2" + "bsrf %1\\n%O3:%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +(define_insn_and_split "call_value_pcrel" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (clobber (reg:SI PR_REG)) + (clobber (match_scratch:SI 3 "=r"))] + "TARGET_SH2" + "#" + "reload_completed" + [(const_int 0)] + " +{ + rtx lab = PATTERN (gen_call_site ()); + + if (SYMBOL_REF_LOCAL_P (operands[1])) + emit_insn (gen_sym_label2reg (operands[3], operands[1], lab)); + else + emit_insn (gen_symPLT_label2reg (operands[3], operands[1], lab)); + emit_call_insn (gen_call_valuei_pcrel (operands[0], operands[3], + operands[2], copy_rtx (lab))); + DONE; +}" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +(define_insn "call_value_compact" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (match_operand 3 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && ! (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%1%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_value_compact_rettramp" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (match_operand 3 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI R10_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%1%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_value_media" + [(set (match_operand 0 "" "=rf") + (call (mem:DI (match_operand 1 "target_reg_operand" "b")) + (match_operand 2 "" ""))) + (clobber (reg:DI PR_MEDIA_REG))] + "TARGET_SHMEDIA" + "blink %1, r18" + [(set_attr "type" "jump_media")]) + +(define_expand "call" + [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" "")) + (match_operand 1 "" "")) + (match_operand 2 "" "") + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))])] + "" + " +{ + if (TARGET_SHMEDIA) + { + operands[0] = shmedia_prepare_call_address (operands[0], 0); + emit_call_insn (gen_call_media (operands[0], operands[1])); + DONE; + } + else if (TARGET_SHCOMPACT && operands[2] && INTVAL (operands[2])) + { + rtx cookie_rtx = operands[2]; + long cookie = INTVAL (cookie_rtx); + rtx func = XEXP (operands[0], 0); + rtx r0, r1; + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTPLT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + r0 = gen_rtx_REG (SImode, R0_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[0] + = function_symbol (NULL, \"__GCC_shcompact_call_trampoline\", + SFUNC_GOT); + operands[0] = force_reg (SImode, operands[0]); + + emit_move_insn (r0, func); + emit_move_insn (r1, cookie_rtx); + + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + emit_call_insn (gen_call_compact_rettramp (operands[0], operands[1], + operands[2])); + else + emit_call_insn (gen_call_compact (operands[0], operands[1], + operands[2])); + + DONE; + } + else if (TARGET_SHCOMPACT && flag_pic + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF + && ! SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0))) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[0], 0))); + XEXP (operands[0], 0) = reg; + } + if (!flag_pic && TARGET_SH2A + && MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF) + { + if (sh2a_is_function_vector_call (XEXP (operands[0], 0))) + { + emit_call_insn (gen_calli_tbr_rel (XEXP (operands[0], 0), + operands[1])); + DONE; + } + } + if (flag_pic && TARGET_SH2 + && MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF) + { + emit_call_insn (gen_call_pcrel (XEXP (operands[0], 0), operands[1])); + DONE; + } + else + { + operands[0] = force_reg (SImode, XEXP (operands[0], 0)); + operands[1] = operands[2]; + } + + emit_call_insn (gen_calli (operands[0], operands[1])); + DONE; +}") + +(define_insn "call_pop_compact" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "n") + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 3 "immediate_operand" "n"))) + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && ! (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%0%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_pop_compact_rettramp" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "n") + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 3 "immediate_operand" "n"))) + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI R10_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%0%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "call_pop" + [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" "")) + (match_operand 1 "" "")) + (match_operand 2 "" "") + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 3 "" "")))])] + "TARGET_SHCOMPACT" + " +{ + rtx cookie_rtx; + long cookie; + rtx func; + rtx r0, r1; + + gcc_assert (operands[2] && INTVAL (operands[2])); + cookie_rtx = operands[2]; + cookie = INTVAL (cookie_rtx); + func = XEXP (operands[0], 0); + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + emit_insn (gen_symGOTPLT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + r0 = gen_rtx_REG (SImode, R0_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[0] = function_symbol (NULL, \"__GCC_shcompact_call_trampoline\", + SFUNC_GOT); + operands[0] = force_reg (SImode, operands[0]); + + emit_move_insn (r0, func); + emit_move_insn (r1, cookie_rtx); + + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + emit_call_insn (gen_call_pop_compact_rettramp + (operands[0], operands[1], operands[2], operands[3])); + else + emit_call_insn (gen_call_pop_compact + (operands[0], operands[1], operands[2], operands[3])); + + DONE; +}") + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "arith_reg_operand" "") + (call (mem:SI (match_operand 1 "arith_reg_operand" "")) + (match_operand 2 "" ""))) + (match_operand 3 "" "") + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))])] + "" + " +{ + if (TARGET_SHMEDIA) + { + operands[1] = shmedia_prepare_call_address (operands[1], 0); + emit_call_insn (gen_call_value_media (operands[0], operands[1], + operands[2])); + DONE; + } + else if (TARGET_SHCOMPACT && operands[3] && INTVAL (operands[3])) + { + rtx cookie_rtx = operands[3]; + long cookie = INTVAL (cookie_rtx); + rtx func = XEXP (operands[1], 0); + rtx r0, r1; + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTPLT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + r0 = gen_rtx_REG (SImode, R0_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[1] + = function_symbol (NULL, \"__GCC_shcompact_call_trampoline\", + SFUNC_GOT); + operands[1] = force_reg (SImode, operands[1]); + + emit_move_insn (r0, func); + emit_move_insn (r1, cookie_rtx); + + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + emit_call_insn (gen_call_value_compact_rettramp (operands[0], + operands[1], + operands[2], + operands[3])); + else + emit_call_insn (gen_call_value_compact (operands[0], operands[1], + operands[2], operands[3])); + + DONE; + } + else if (TARGET_SHCOMPACT && flag_pic + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && ! SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0))) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[1], 0))); + XEXP (operands[1], 0) = reg; + } + if (!flag_pic && TARGET_SH2A + && MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF) + { + if (sh2a_is_function_vector_call (XEXP (operands[1], 0))) + { + emit_call_insn (gen_call_valuei_tbr_rel (operands[0], + XEXP (operands[1], 0), operands[2])); + DONE; + } + } + if (flag_pic && TARGET_SH2 + && MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF) + { + emit_call_insn (gen_call_value_pcrel (operands[0], XEXP (operands[1], 0), + operands[2])); + DONE; + } + else + operands[1] = force_reg (SImode, XEXP (operands[1], 0)); + + emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "sibcalli" + [(call (mem:SI (match_operand:SI 0 "register_operand" "k")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (return)] + "TARGET_SH1" + "jmp @%0%#" + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcalli_pcrel" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (reg:PSI FPSCR_REG)) + (return)] + "TARGET_SH2" + "braf %0\\n%O2:%#" + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +;; This uses an unspec to describe that the symbol_ref is very close. +(define_insn "sibcalli_thunk" + [(call (mem:SI (unspec:SI [(match_operand:SI 0 "symbol_ref_operand" "")] + UNSPEC_THUNK)) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (return)] + "TARGET_SH1" + "bra %O0" + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump") + (set_attr "length" "2")]) + +(define_insn_and_split "sibcall_pcrel" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (clobber (match_scratch:SI 2 "=k")) + (return)] + "TARGET_SH2" + "#" + "reload_completed" + [(const_int 0)] + " +{ + rtx lab = PATTERN (gen_call_site ()); + rtx call_insn; + + emit_insn (gen_sym_label2reg (operands[2], operands[0], lab)); + call_insn = emit_call_insn (gen_sibcalli_pcrel (operands[2], operands[1], + copy_rtx (lab))); + SIBLING_CALL_P (call_insn) = 1; + DONE; +}" + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcall_compact" + [(call (mem:SI (match_operand:SI 0 "register_operand" "k,k")) + (match_operand 1 "" "")) + (return) + (use (match_operand:SI 2 "register_operand" "z,x")) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + ;; We want to make sure the `x' above will only match MACH_REG + ;; because sibcall_epilogue may clobber MACL_REG. + (clobber (reg:SI MACL_REG))] + "TARGET_SHCOMPACT" + "@ + jmp @%0%# + jmp @%0\\n sts %2, r0" + [(set_attr "needs_delay_slot" "yes,no") + (set_attr "length" "2,4") + (set (attr "fp_mode") (const_string "single")) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcall_media" + [(call (mem:DI (match_operand 0 "target_reg_operand" "k")) + (match_operand 1 "" "")) + (use (reg:SI PR_MEDIA_REG)) + (return)] + "TARGET_SHMEDIA" + "blink %0, r63" + [(set_attr "type" "jump_media")]) + +(define_expand "sibcall" + [(parallel + [(call (mem:SI (match_operand 0 "arith_reg_operand" "")) + (match_operand 1 "" "")) + (match_operand 2 "" "") + (use (reg:PSI FPSCR_REG)) + (return)])] + "" + " +{ + if (TARGET_SHMEDIA) + { + operands[0] = shmedia_prepare_call_address (operands[0], 1); + emit_call_insn (gen_sibcall_media (operands[0], operands[1])); + DONE; + } + else if (TARGET_SHCOMPACT && operands[2] + && (INTVAL (operands[2]) & ~ CALL_COOKIE_RET_TRAMP (1))) + { + rtx cookie_rtx = operands[2]; + long cookie = INTVAL (cookie_rtx); + rtx func = XEXP (operands[0], 0); + rtx mach, r1; + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + /* FIXME: if we could tell whether all argument registers are + already taken, we could decide whether to force the use of + MACH_REG or to stick to R0_REG. Unfortunately, there's no + simple way to tell. We could use the CALL_COOKIE, but we + can't currently tell a register used for regular argument + passing from one that is unused. If we leave it up to reload + to decide which register to use, it seems to always choose + R0_REG, which leaves no available registers in SIBCALL_REGS + to hold the address of the trampoline. */ + mach = gen_rtx_REG (SImode, MACH_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[0] + = function_symbol (NULL, \"__GCC_shcompact_call_trampoline\", + SFUNC_GOT); + operands[0] = force_reg (SImode, operands[0]); + + /* We don't need a return trampoline, since the callee will + return directly to the upper caller. */ + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + { + cookie &= ~ CALL_COOKIE_RET_TRAMP (1); + cookie_rtx = GEN_INT (cookie); + } + + emit_move_insn (mach, func); + emit_move_insn (r1, cookie_rtx); + + emit_call_insn (gen_sibcall_compact (operands[0], operands[1], mach)); + DONE; + } + else if (TARGET_SHCOMPACT && flag_pic + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF + && ! SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0))) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOT2reg (reg, XEXP (operands[0], 0))); + XEXP (operands[0], 0) = reg; + } + if (flag_pic && TARGET_SH2 + && MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF + /* The PLT needs the PIC register, but the epilogue would have + to restore it, so we can only use PC-relative PIC calls for + static functions. */ + && SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0))) + { + emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1])); + DONE; + } + else + operands[0] = force_reg (SImode, XEXP (operands[0], 0)); + + emit_call_insn (gen_sibcalli (operands[0], operands[1])); + DONE; +}") + +(define_insn "sibcall_valuei" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "register_operand" "k")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (return)] + "TARGET_SH1" + "jmp @%1%#" + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcall_valuei_pcrel" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "k")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (use (reg:PSI FPSCR_REG)) + (return)] + "TARGET_SH2" + "braf %1\\n%O3:%#" + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +(define_insn_and_split "sibcall_value_pcrel" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (clobber (match_scratch:SI 3 "=k")) + (return)] + "TARGET_SH2" + "#" + "reload_completed" + [(const_int 0)] + " +{ + rtx lab = PATTERN (gen_call_site ()); + rtx call_insn; + + emit_insn (gen_sym_label2reg (operands[3], operands[1], lab)); + call_insn = emit_call_insn (gen_sibcall_valuei_pcrel (operands[0], + operands[3], + operands[2], + copy_rtx (lab))); + SIBLING_CALL_P (call_insn) = 1; + DONE; +}" + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcall_value_compact" + [(set (match_operand 0 "" "=rf,rf") + (call (mem:SI (match_operand:SI 1 "register_operand" "k,k")) + (match_operand 2 "" ""))) + (return) + (use (match_operand:SI 3 "register_operand" "z,x")) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + ;; We want to make sure the `x' above will only match MACH_REG + ;; because sibcall_epilogue may clobber MACL_REG. + (clobber (reg:SI MACL_REG))] + "TARGET_SHCOMPACT" + "@ + jmp @%1%# + jmp @%1\\n sts %3, r0" + [(set_attr "needs_delay_slot" "yes,no") + (set_attr "length" "2,4") + (set (attr "fp_mode") (const_string "single")) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcall_value_media" + [(set (match_operand 0 "" "=rf") + (call (mem:DI (match_operand 1 "target_reg_operand" "k")) + (match_operand 2 "" ""))) + (use (reg:SI PR_MEDIA_REG)) + (return)] + "TARGET_SHMEDIA" + "blink %1, r63" + [(set_attr "type" "jump_media")]) + +(define_expand "sibcall_value" + [(parallel + [(set (match_operand 0 "arith_reg_operand" "") + (call (mem:SI (match_operand 1 "arith_reg_operand" "")) + (match_operand 2 "" ""))) + (match_operand 3 "" "") + (use (reg:PSI FPSCR_REG)) + (return)])] + "" + " +{ + if (TARGET_SHMEDIA) + { + operands[1] = shmedia_prepare_call_address (operands[1], 1); + emit_call_insn (gen_sibcall_value_media (operands[0], operands[1], + operands[2])); + DONE; + } + else if (TARGET_SHCOMPACT && operands[3] + && (INTVAL (operands[3]) & ~ CALL_COOKIE_RET_TRAMP (1))) + { + rtx cookie_rtx = operands[3]; + long cookie = INTVAL (cookie_rtx); + rtx func = XEXP (operands[1], 0); + rtx mach, r1; + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + /* FIXME: if we could tell whether all argument registers are + already taken, we could decide whether to force the use of + MACH_REG or to stick to R0_REG. Unfortunately, there's no + simple way to tell. We could use the CALL_COOKIE, but we + can't currently tell a register used for regular argument + passing from one that is unused. If we leave it up to reload + to decide which register to use, it seems to always choose + R0_REG, which leaves no available registers in SIBCALL_REGS + to hold the address of the trampoline. */ + mach = gen_rtx_REG (SImode, MACH_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[1] + = function_symbol (NULL, \"__GCC_shcompact_call_trampoline\", + SFUNC_GOT); + operands[1] = force_reg (SImode, operands[1]); + + /* We don't need a return trampoline, since the callee will + return directly to the upper caller. */ + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + { + cookie &= ~ CALL_COOKIE_RET_TRAMP (1); + cookie_rtx = GEN_INT (cookie); + } + + emit_move_insn (mach, func); + emit_move_insn (r1, cookie_rtx); + + emit_call_insn (gen_sibcall_value_compact (operands[0], operands[1], + operands[2], mach)); + DONE; + } + else if (TARGET_SHCOMPACT && flag_pic + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && ! SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0))) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOT2reg (reg, XEXP (operands[1], 0))); + XEXP (operands[1], 0) = reg; + } + if (flag_pic && TARGET_SH2 + && MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + /* The PLT needs the PIC register, but the epilogue would have + to restore it, so we can only use PC-relative PIC calls for + static functions. */ + && SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0))) + { + emit_call_insn (gen_sibcall_value_pcrel (operands[0], + XEXP (operands[1], 0), + operands[2])); + DONE; + } + else + operands[1] = force_reg (SImode, XEXP (operands[1], 0)); + + emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "call_value_pop_compact" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 4 "immediate_operand" "n"))) + (match_operand 3 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && ! (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%1%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_value_pop_compact_rettramp" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 4 "immediate_operand" "n"))) + (match_operand 3 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI R10_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%1%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "call_value_pop" + [(parallel [(set (match_operand 0 "arith_reg_operand" "") + (call (mem:SI (match_operand 1 "arith_reg_operand" "")) + (match_operand 2 "" ""))) + (match_operand 3 "" "") + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 4 "" "")))])] + "TARGET_SHCOMPACT" + " +{ + rtx cookie_rtx; + long cookie; + rtx func; + rtx r0, r1; + + gcc_assert (TARGET_SHCOMPACT && operands[3] && INTVAL (operands[3])); + cookie_rtx = operands[3]; + cookie = INTVAL (cookie_rtx); + func = XEXP (operands[1], 0); + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTPLT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + r0 = gen_rtx_REG (SImode, R0_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[1] = function_symbol (NULL, \"__GCC_shcompact_call_trampoline\", + SFUNC_GOT); + operands[1] = force_reg (SImode, operands[1]); + + emit_move_insn (r0, func); + emit_move_insn (r1, cookie_rtx); + + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + emit_call_insn (gen_call_value_pop_compact_rettramp + (operands[0], operands[1], operands[2], + operands[3], operands[4])); + else + emit_call_insn (gen_call_value_pop_compact + (operands[0], operands[1], operands[2], + operands[3], operands[4])); + + DONE; +}") + +(define_expand "sibcall_epilogue" + [(return)] + "" + " +{ + sh_expand_epilogue (1); + if (TARGET_SHCOMPACT) + { + rtx insn, set; + + /* If epilogue clobbers r0, preserve it in macl. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if ((set = single_set (insn)) + && REG_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) == R0_REG) + { + rtx r0 = gen_rtx_REG (SImode, R0_REG); + rtx tmp = gen_rtx_REG (SImode, MACL_REG); + + /* We can't tell at this point whether the sibcall is a + sibcall_compact and, if it is, whether it uses r0 or + mach as operand 2, so let the instructions that + preserve r0 be optimized away if r0 turns out to be + dead. */ + emit_insn_before (gen_rtx_SET (SImode, tmp, r0), insn); + emit_move_insn (r0, tmp); + break; + } + } + DONE; +}") + +(define_insn "indirect_jump_compact" + [(set (pc) + (match_operand:SI 0 "arith_reg_operand" "r"))] + "TARGET_SH1" + "jmp @%0%#" + [(set_attr "needs_delay_slot" "yes") + (set_attr "type" "jump_ind")]) + +(define_expand "indirect_jump" + [(set (pc) + (match_operand 0 "register_operand" ""))] + "" + " +{ + if (GET_MODE (operands[0]) != Pmode) + operands[0] = gen_rtx_SUBREG (Pmode, operands[0], 0); +}") + +;; The use of operand 1 / 2 helps us distinguish case table jumps +;; which can be present in structured code from indirect jumps which can not +;; be present in structured code. This allows -fprofile-arcs to work. + +;; For SH1 processors. +(define_insn "casesi_jump_1" + [(set (pc) + (match_operand:SI 0 "register_operand" "r")) + (use (label_ref (match_operand 1 "" "")))] + "TARGET_SH1" + "jmp @%0%#" + [(set_attr "needs_delay_slot" "yes") + (set_attr "type" "jump_ind")]) + +;; For all later processors. +(define_insn "casesi_jump_2" + [(set (pc) (plus:SI (match_operand:SI 0 "register_operand" "r") + (label_ref (match_operand 1 "" "")))) + (use (label_ref (match_operand 2 "" "")))] + "TARGET_SH2 + && (! INSN_UID (operands[1]) || prev_real_insn (operands[1]) == insn)" + "braf %0%#" + [(set_attr "needs_delay_slot" "yes") + (set_attr "type" "jump_ind")]) + +(define_insn "casesi_jump_media" + [(set (pc) (match_operand 0 "target_reg_operand" "b")) + (use (label_ref (match_operand 1 "" "")))] + "TARGET_SHMEDIA" + "blink %0, r63" + [(set_attr "type" "jump_media")]) + +;; Call subroutine returning any type. +;; ??? This probably doesn't work. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "(TARGET_SH2E || TARGET_SH2A) || TARGET_SHMEDIA" + " +{ + int i; + + emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}") + +;; ------------------------------------------------------------------------ +;; Misc insns +;; ------------------------------------------------------------------------ + +(define_insn "dect" + [(set (reg:SI T_REG) + (eq:SI (match_operand:SI 1 "arith_reg_dest" "0") (const_int 1))) + (set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus:SI (match_dup 1) (const_int -1)))] + "TARGET_SH2" + "dt %0" + [(set_attr "type" "arith")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop") + +;; Load address of a label. This is only generated by the casesi expand, +;; and by machine_dependent_reorg (fixing up fp moves). +;; This must use unspec, because this only works for labels that are +;; within range, + +(define_insn "mova" + [(set (reg:SI R0_REG) + (unspec:SI [(label_ref (match_operand 0 "" ""))] UNSPEC_MOVA))] + "TARGET_SH1" + "mova %O0,r0" + [(set_attr "in_delay_slot" "no") + (set_attr "type" "arith")]) + +;; machine_dependent_reorg will make this a `mova'. +(define_insn "mova_const" + [(set (reg:SI R0_REG) + (unspec:SI [(match_operand 0 "immediate_operand" "i")] UNSPEC_MOVA))] + "TARGET_SH1" + "#" + [(set_attr "in_delay_slot" "no") + (set_attr "type" "arith")]) + +(define_expand "GOTaddr2picreg" + [(set (reg:SI R0_REG) + (unspec:SI [(const:SI (unspec:SI [(match_dup 1)] UNSPEC_PIC))] + UNSPEC_MOVA)) + (set (match_dup 0) (const:SI (unspec:SI [(match_dup 1)] UNSPEC_PIC))) + (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI R0_REG)))] + "" " +{ + if (TARGET_VXWORKS_RTP) + { + rtx gott_base = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); + rtx gott_index = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); + emit_insn (gen_vxworks_picreg (gott_base, gott_index)); + DONE; + } + + operands[0] = gen_rtx_REG (Pmode, PIC_REG); + operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME); + + if (TARGET_SHMEDIA) + { + rtx tr = gen_rtx_REG (Pmode, TR0_REG); + rtx pic = operands[0]; + rtx lab = PATTERN (gen_call_site ()); + rtx insn, equiv; + + equiv = operands[1]; + operands[1] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[1], lab), + UNSPEC_PCREL_SYMOFF); + operands[1] = gen_rtx_CONST (Pmode, operands[1]); + + if (Pmode == SImode) + { + emit_insn (gen_movsi_const (pic, operands[1])); + emit_insn (gen_ptrel_si (tr, pic, copy_rtx (lab))); + } + else + { + emit_insn (gen_movdi_const (pic, operands[1])); + emit_insn (gen_ptrel_di (tr, pic, copy_rtx (lab))); + } + + insn = emit_move_insn (operands[0], tr); + + set_unique_reg_note (insn, REG_EQUAL, equiv); + + DONE; + } +} +") + +;; A helper for GOTaddr2picreg to finish up the initialization of the +;; PIC register. + +(define_expand "vxworks_picreg" + [(set (reg:SI PIC_REG) + (const:SI (unspec:SI [(match_operand:SI 0 "" "")] UNSPEC_PIC))) + (set (reg:SI R0_REG) + (const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PIC))) + (set (reg:SI PIC_REG) + (mem:SI (reg:SI PIC_REG))) + (set (reg:SI PIC_REG) + (mem:SI (plus:SI (reg:SI PIC_REG) + (reg:SI R0_REG))))] + "TARGET_VXWORKS_RTP") + +(define_insn "*ptb" + [(set (match_operand 0 "target_reg_operand" "=b") + (const (unspec [(match_operand 1 "" "Csy")] + UNSPEC_DATALABEL)))] + "TARGET_SHMEDIA && flag_pic + && satisfies_constraint_Csy (operands[1])" + "ptb/u datalabel %1, %0" + [(set_attr "type" "ptabs_media") + (set_attr "length" "*")]) + +(define_insn "ptrel_si" + [(set (match_operand:SI 0 "target_reg_operand" "=b") + (plus:SI (match_operand:SI 1 "register_operand" "r") + (pc))) + (match_operand:SI 2 "" "")] + "TARGET_SHMEDIA" + "%O2: ptrel/u %1, %0" + [(set_attr "type" "ptabs_media")]) + +(define_insn "ptrel_di" + [(set (match_operand:DI 0 "target_reg_operand" "=b") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (pc))) + (match_operand:DI 2 "" "")] + "TARGET_SHMEDIA" + "%O2: ptrel/u %1, %0" + [(set_attr "type" "ptabs_media")]) + +(define_expand "builtin_setjmp_receiver" + [(match_operand 0 "" "")] + "flag_pic" + " +{ + emit_insn (gen_GOTaddr2picreg ()); + DONE; +}") + +(define_expand "call_site" + [(unspec [(match_dup 0)] UNSPEC_CALLER)] + "TARGET_SH1" + " +{ + static HOST_WIDE_INT i = 0; + operands[0] = GEN_INT (i); + i++; +}") + +;; op0 = op1 + r12 but hide it before reload completed. See the comment +;; in symGOT_load expand. + +(define_insn_and_split "chk_guard_add" + [(set (match_operand:SI 0 "register_operand" "=&r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (reg:SI PIC_REG)] + UNSPEC_CHKADD))] + "TARGET_SH1" + "#" + "TARGET_SH1 && reload_completed" + [(set (match_dup 0) (reg:SI PIC_REG)) + (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))] + "" + [(set_attr "type" "arith")]) + +(define_expand "sym_label2reg" + [(set (match_operand:SI 0 "" "") + (const:SI (unspec:SI [(match_operand:SI 1 "" "") + (const (plus:SI (match_operand:SI 2 "" "") + (const_int 2)))] + UNSPEC_SYMOFF)))] + "TARGET_SH1" "") + +(define_expand "symGOT_load" + [(set (match_dup 2) (match_operand 1 "" "")) + (set (match_dup 3) (plus (match_dup 2) (reg PIC_REG))) + (set (match_operand 0 "" "") (mem (match_dup 3)))] + "" + " +{ + rtx mem; + + operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + + if (TARGET_SHMEDIA) + { + rtx reg = operands[2]; + + if (Pmode == DImode) + { + if (flag_pic > 1) + emit_insn (gen_movdi_const_32bit (reg, operands[1])); + else + emit_insn (gen_movdi_const_16bit (reg, operands[1])); + } + else + { + if (flag_pic > 1) + emit_insn (gen_movsi_const (reg, operands[1])); + else + emit_insn (gen_movsi_const_16bit (reg, operands[1])); + } + } + else + emit_move_insn (operands[2], operands[1]); + + /* When stack protector inserts codes after the result is set to + R0, @(rX, r12) will cause a spill failure for R0. Use a unspec + insn to avoid combining (set A (plus rX r12)) and (set op0 (mem A)) + when rX is a GOT address for the guard symbol. Ugly but doesn't + matter because this is a rare situation. */ + if (!TARGET_SHMEDIA + && flag_stack_protect + && GET_CODE (operands[1]) == CONST + && GET_CODE (XEXP (operands[1], 0)) == UNSPEC + && GET_CODE (XVECEXP (XEXP (operands[1], 0), 0, 0)) == SYMBOL_REF + && strcmp (XSTR (XVECEXP (XEXP (operands[1], 0), 0, 0), 0), + \"__stack_chk_guard\") == 0) + emit_insn (gen_chk_guard_add (operands[3], operands[2])); + else + emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2], + gen_rtx_REG (Pmode, PIC_REG))); + + /* N.B. This is not constant for a GOTPLT relocation. */ + mem = gen_rtx_MEM (Pmode, operands[3]); + MEM_NOTRAP_P (mem) = 1; + /* ??? Should we have a special alias set for the GOT? */ + emit_move_insn (operands[0], mem); + + DONE; +}") + +(define_expand "sym2GOT" + [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOT))] + "" + "") + +(define_expand "symGOT2reg" + [(match_operand 0 "" "") (match_operand 1 "" "")] + "" + " +{ + rtx gotsym, insn; + + gotsym = gen_sym2GOT (operands[1]); + PUT_MODE (gotsym, Pmode); + insn = emit_insn (gen_symGOT_load (operands[0], gotsym)); + + MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1; + + DONE; +}") + +(define_expand "symGOTPLT2reg" + [(match_operand 0 "" "") (match_operand 1 "" "")] + "" + " +{ + rtx pltsym = gen_rtx_CONST (Pmode, + gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, operands[1]), + UNSPEC_GOTPLT)); + emit_insn (gen_symGOT_load (operands[0], pltsym)); + DONE; +}") + +(define_expand "sym2GOTOFF" + [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTOFF))] + "" + "") + +(define_expand "symGOTOFF2reg" + [(match_operand 0 "" "") (match_operand 1 "" "")] + "" + " +{ + rtx gotoffsym, insn; + rtx t = (!can_create_pseudo_p () + ? operands[0] + : gen_reg_rtx (GET_MODE (operands[0]))); + + gotoffsym = gen_sym2GOTOFF (operands[1]); + PUT_MODE (gotoffsym, Pmode); + emit_move_insn (t, gotoffsym); + insn = emit_move_insn (operands[0], + gen_rtx_PLUS (Pmode, t, + gen_rtx_REG (Pmode, PIC_REG))); + + set_unique_reg_note (insn, REG_EQUAL, operands[1]); + + DONE; +}") + +(define_expand "symPLT_label2reg" + [(set (match_operand:SI 0 "" "") + (const:SI + (unspec:SI + [(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PLT)) + (const:SI (plus:SI (match_operand:SI 2 "" "") + (const_int 2)))] UNSPEC_PCREL_SYMOFF))) + ;; Even though the PIC register is not really used by the call + ;; sequence in which this is expanded, the PLT code assumes the PIC + ;; register is set, so we must not skip its initialization. Since + ;; we only use this expand as part of calling sequences, and never + ;; to take the address of a function, this is the best point to + ;; insert the (use). Using the PLT to take the address of a + ;; function would be wrong, not only because the PLT entry could + ;; then be called from a function that doesn't initialize the PIC + ;; register to the proper GOT, but also because pointers to the + ;; same function might not compare equal, should they be set by + ;; different shared libraries. + (use (reg:SI PIC_REG))] + "TARGET_SH1" + "") + +(define_expand "sym2PIC" + [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PIC))] + "" + "") + +;; TLS code generation. +;; ??? this should be a define_insn_and_split +;; See the thread [PATCH/RFA] SH TLS support on gcc-patches +;; +;; for details. + +(define_insn "tls_global_dynamic" + [(set (match_operand:SI 0 "register_operand" "=&z") + (call:SI (mem:SI (unspec:SI [(match_operand:SI 1 "" "")] + UNSPEC_TLSGD)) + (const_int 0))) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (clobber (reg:SI PR_REG)) + (clobber (scratch:SI))] + "TARGET_SH1" + "* +{ + return \"\\ +mov.l\\t1f,r4\\n\\ +\\tmova\\t2f,r0\\n\\ +\\tmov.l\\t2f,r1\\n\\ +\\tadd\\tr0,r1\\n\\ +\\tjsr\\t@r1\\n\\ +\\tadd\\tr12,r4\\n\\ +\\tbra\\t3f\\n\\ +\\tnop\\n\\ +\\t.align\\t2\\n\\ +1:\\t.long\\t%a1@TLSGD\\n\\ +2:\\t.long\\t__tls_get_addr@PLT\\n\\ +3:\"; +}" + [(set_attr "type" "tls_load") + (set_attr "length" "26")]) + +(define_insn "tls_local_dynamic" + [(set (match_operand:SI 0 "register_operand" "=&z") + (call:SI (mem:SI (unspec:SI [(match_operand:SI 1 "" "")] + UNSPEC_TLSLDM)) + (const_int 0))) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (clobber (reg:SI PR_REG)) + (clobber (scratch:SI))] + "TARGET_SH1" + "* +{ + return \"\\ +mov.l\\t1f,r4\\n\\ +\\tmova\\t2f,r0\\n\\ +\\tmov.l\\t2f,r1\\n\\ +\\tadd\\tr0,r1\\n\\ +\\tjsr\\t@r1\\n\\ +\\tadd\\tr12,r4\\n\\ +\\tbra\\t3f\\n\\ +\\tnop\\n\\ +\\t.align\\t2\\n\\ +1:\\t.long\\t%a1@TLSLDM\\n\\ +2:\\t.long\\t__tls_get_addr@PLT\\n\\ +3:\"; +}" + [(set_attr "type" "tls_load") + (set_attr "length" "26")]) + +(define_expand "sym2DTPOFF" + [(const (unspec [(match_operand 0 "" "")] UNSPEC_DTPOFF))] + "" + "") + +(define_expand "symDTPOFF2reg" + [(match_operand 0 "" "") (match_operand 1 "" "") (match_operand 2 "" "")] + "" + " +{ + rtx dtpoffsym; + rtx t = (!can_create_pseudo_p () + ? operands[0] + : gen_reg_rtx (GET_MODE (operands[0]))); + + dtpoffsym = gen_sym2DTPOFF (operands[1]); + PUT_MODE (dtpoffsym, Pmode); + emit_move_insn (t, dtpoffsym); + emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, operands[2])); + DONE; +}") + +(define_expand "sym2GOTTPOFF" + [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTTPOFF))] + "" + "") + +(define_insn "tls_initial_exec" + [(set (match_operand:SI 0 "register_operand" "=&r") + (unspec:SI [(match_operand:SI 1 "" "")] + UNSPEC_TLSIE)) + (use (reg:SI GBR_REG)) + (use (reg:SI PIC_REG)) + (clobber (reg:SI R0_REG))] + "" + "* +{ + return \"\\ +mov.l\\t1f,r0\\n\\ +\\tstc\\tgbr,%0\\n\\ +\\tmov.l\\t@(r0,r12),r0\\n\\ +\\tbra\\t2f\\n\\ +\\tadd\\tr0,%0\\n\\ +\\t.align\\t2\\n\\ +1:\\t.long\\t%a1\\n\\ +2:\"; +}" + [(set_attr "type" "tls_load") + (set_attr "length" "16")]) + +(define_expand "sym2TPOFF" + [(const (unspec [(match_operand 0 "" "")] UNSPEC_TPOFF))] + "" + "") + +(define_expand "symTPOFF2reg" + [(match_operand 0 "" "") (match_operand 1 "" "")] + "" + " +{ + rtx tpoffsym; + + tpoffsym = gen_sym2TPOFF (operands[1]); + PUT_MODE (tpoffsym, Pmode); + emit_move_insn (operands[0], tpoffsym); + DONE; +}") + +(define_insn "load_gbr" + [(set (match_operand:SI 0 "register_operand" "=r") (reg:SI GBR_REG)) + (use (reg:SI GBR_REG))] + "" + "stc gbr,%0" + [(set_attr "type" "tls_load")]) + +;; case instruction for switch statements. + +;; Operand 0 is index +;; operand 1 is the minimum bound +;; operand 2 is the maximum bound - minimum bound + 1 +;; operand 3 is CODE_LABEL for the table; +;; operand 4 is the CODE_LABEL to go to if index out of range. + +(define_expand "casesi" + [(match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" "") + (match_operand 3 "" "") (match_operand 4 "" "")] + "" + " +{ + rtx reg = gen_reg_rtx (SImode); + rtx reg2 = gen_reg_rtx (SImode); + if (TARGET_SHMEDIA) + { + rtx reg = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); + rtx reg3 = gen_reg_rtx (Pmode); + rtx reg4 = gen_reg_rtx (Pmode); + rtx reg5 = gen_reg_rtx (Pmode); + rtx load, test; + + operands[0] = convert_modes (DImode, SImode, operands[0], 0); + operands[1] = convert_modes (DImode, SImode, operands[1], 0); + operands[2] = convert_modes (DImode, SImode, operands[2], 1); + + test = gen_rtx_GT (VOIDmode, operands[1], operands[0]); + emit_jump_insn (gen_cbranchdi4 (test, operands[1], operands[0], operands[4])); + emit_move_insn (reg, gen_rtx_MINUS (DImode, operands[0], operands[1])); + test = gen_rtx_GTU (VOIDmode, reg, operands[2]); + emit_jump_insn (gen_cbranchdi4 (test, reg, operands[2], operands[4])); + emit_insn (gen_casesi_shift_media (reg2, reg, operands[3])); + emit_move_insn (reg3, gen_datalabel_ref (gen_rtx_LABEL_REF + (Pmode, operands[3]))); + /* Messy: can we subreg to clean this up? */ + if (Pmode == DImode) + load = gen_casesi_load_media (reg4, reg3, reg2, operands[3]); + else + load = gen_casesi_load_media (reg4, + gen_rtx_SUBREG (DImode, reg3, 0), + reg2, operands[3]); + PUT_MODE (SET_SRC (load), Pmode); + emit_insn (load); + /* ??? The following add could be eliminated if we used ptrel. */ + emit_move_insn (reg5, gen_rtx_PLUS (Pmode, reg3, reg4)); + emit_jump_insn (gen_casesi_jump_media (reg5, operands[3])); + emit_barrier (); + DONE; + } + operands[1] = copy_to_mode_reg (SImode, operands[1]); + operands[2] = copy_to_mode_reg (SImode, operands[2]); + /* If optimizing, casesi_worker depends on the mode of the instruction + before label it 'uses' - operands[3]. */ + emit_insn (gen_casesi_0 (operands[0], operands[1], operands[2], operands[4], + reg)); + emit_insn (gen_casesi_worker_0 (reg2, reg, operands[3])); + if (TARGET_SH2) + emit_jump_insn (gen_casesi_jump_2 (reg2, gen_label_rtx (), operands[3])); + else + emit_jump_insn (gen_casesi_jump_1 (reg2, operands[3])); + /* For SH2 and newer, the ADDR_DIFF_VEC is not actually relative to + operands[3], but to lab. We will fix this up in + machine_dependent_reorg. */ + emit_barrier (); + DONE; +}") + +(define_expand "casesi_0" + [(set (match_operand:SI 4 "" "") (match_operand:SI 0 "arith_reg_operand" "")) + (set (match_dup 4) (minus:SI (match_dup 4) + (match_operand:SI 1 "arith_operand" ""))) + (set (reg:SI T_REG) + (gtu:SI (match_dup 4) + (match_operand:SI 2 "arith_reg_operand" ""))) + (set (pc) + (if_then_else (ne (reg:SI T_REG) + (const_int 0)) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_SH1" + "") + +;; ??? reload might clobber r0 if we use it explicitly in the RTL before +;; reload; using a R0_REGS pseudo reg is likely to give poor code. +;; So we keep the use of r0 hidden in a R0_REGS clobber until after reload. + +(define_insn "casesi_worker_0" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(match_operand:SI 1 "register_operand" "0,r") + (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI)) + (clobber (match_scratch:SI 3 "=X,1")) + (clobber (match_scratch:SI 4 "=&z,z"))] + "TARGET_SH1" + "#") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_operand:SI 1 "register_operand" "") + (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 ""))] + "TARGET_SH1 && ! TARGET_SH2 && reload_completed" + [(set (reg:SI R0_REG) (unspec:SI [(label_ref (match_dup 2))] UNSPEC_MOVA)) + (parallel [(set (match_dup 0) + (unspec:SI [(reg:SI R0_REG) (match_dup 1) + (label_ref (match_dup 2))] UNSPEC_CASESI)) + (clobber (match_dup 3))]) + (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI R0_REG)))] + "if (GET_CODE (operands[2]) == CODE_LABEL) LABEL_NUSES (operands[2])++;") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_operand:SI 1 "register_operand" "") + (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 ""))] + "TARGET_SH2 && reload_completed" + [(set (reg:SI R0_REG) (unspec:SI [(label_ref (match_dup 2))] UNSPEC_MOVA)) + (parallel [(set (match_dup 0) + (unspec:SI [(reg:SI R0_REG) (match_dup 1) + (label_ref (match_dup 2))] UNSPEC_CASESI)) + (clobber (match_dup 3))])] + "if (GET_CODE (operands[2]) == CODE_LABEL) LABEL_NUSES (operands[2])++;") + +(define_insn "casesi_worker_1" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(reg:SI R0_REG) + (match_operand:SI 1 "register_operand" "0,r") + (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI)) + (clobber (match_scratch:SI 3 "=X,1"))] + "TARGET_SH1" + "* +{ + rtx diff_vec = PATTERN (next_real_insn (operands[2])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE (diff_vec)) + { + case SImode: + return \"shll2 %1\;mov.l @(r0,%1),%0\"; + case HImode: + return \"add %1,%1\;mov.w @(r0,%1),%0\"; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return \"mov.b @(r0,%1),%0\;extu.b %0,%0\"; + return \"mov.b @(r0,%1),%0\"; + default: + gcc_unreachable (); + } +}" + [(set_attr "length" "4")]) + +(define_insn "casesi_worker_2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(reg:SI R0_REG) + (match_operand:SI 1 "register_operand" "0,r") + (label_ref (match_operand 2 "" "")) + (label_ref (match_operand 3 "" ""))] UNSPEC_CASESI)) + (clobber (match_operand:SI 4 "" "=X,1"))] + "TARGET_SH2 && reload_completed && flag_pic" + "* +{ + rtx diff_vec = PATTERN (next_real_insn (operands[2])); + const char *load; + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE (diff_vec)) + { + case SImode: + output_asm_insn (\"shll2 %1\", operands); + load = \"mov.l @(r0,%1),%0\"; break; + case HImode: + output_asm_insn (\"add %1,%1\", operands); + load = \"mov.w @(r0,%1),%0\"; break; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + load = \"mov.b @(r0,%1),%0\;extu.b %0,%0\"; + else + load = \"mov.b @(r0,%1),%0\"; + break; + default: + gcc_unreachable (); + } + output_asm_insn (\"add\tr0,%1\;mova\t%O3,r0\\n\", operands); + return load; +}" + [(set_attr "length" "8")]) + +(define_insn "casesi_shift_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "r") + (unspec:DI [(label_ref:DI (match_operand 2 "" ""))] + UNSPEC_CASESI)))] + "TARGET_SHMEDIA" + "* +{ + rtx diff_vec = PATTERN (next_real_insn (operands[2])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE (diff_vec)) + { + case SImode: + return \"shlli %1, 2, %0\"; + case HImode: + return \"shlli %1, 1, %0\"; + case QImode: + if (rtx_equal_p (operands[0], operands[1])) + return \"\"; + return \"add %1, r63, %0\"; + default: + gcc_unreachable (); + } +}" + [(set_attr "type" "arith_media")]) + +(define_insn "casesi_load_media" + [(set (match_operand 0 "any_arith_reg_dest" "=r") + (mem (unspec [(match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "arith_reg_operand" "r") + (label_ref:DI (match_operand 3 "" ""))] UNSPEC_CASESI)))] + "TARGET_SHMEDIA" + "* +{ + rtx diff_vec = PATTERN (next_real_insn (operands[3])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE (diff_vec)) + { + case SImode: + return \"ldx.l %1, %2, %0\"; + case HImode: +#if 0 + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return \"ldx.uw %1, %2, %0\"; +#endif + return \"ldx.w %1, %2, %0\"; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return \"ldx.ub %1, %2, %0\"; + return \"ldx.b %1, %2, %0\"; + default: + gcc_unreachable (); + } +}" + [(set_attr "type" "load_media")]) + +(define_expand "return" + [(return)] + "reload_completed && ! sh_need_epilogue ()" + " +{ + if (TARGET_SHMEDIA) + { + emit_jump_insn (gen_return_media ()); + DONE; + } + + if (TARGET_SHCOMPACT + && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))) + { + emit_jump_insn (gen_shcompact_return_tramp ()); + DONE; + } +}") + +(define_insn "*return_i" + [(return)] + "TARGET_SH1 && ! (TARGET_SHCOMPACT + && (crtl->args.info.call_cookie + & CALL_COOKIE_RET_TRAMP (1))) + && reload_completed + && lookup_attribute (\"trap_exit\", + DECL_ATTRIBUTES (current_function_decl)) == NULL_TREE" + "* + { + if (TARGET_SH2A && (dbr_sequence_length () == 0) + && !current_function_interrupt) + return \"rts/n\"; + else + return \"%@ %#\"; + }" + [(set_attr "type" "return") + (set_attr "needs_delay_slot" "yes")]) + +;; trapa has no delay slot. +(define_insn "*return_trapa" + [(return)] + "TARGET_SH1 && !TARGET_SHCOMPACT + && reload_completed" + "%@" + [(set_attr "type" "return")]) + +(define_expand "shcompact_return_tramp" + [(return)] + "TARGET_SHCOMPACT + && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))" + " +{ + rtx reg = gen_rtx_REG (Pmode, R0_REG); + + function_symbol (reg, \"__GCC_shcompact_return_trampoline\", SFUNC_STATIC); + emit_jump_insn (gen_shcompact_return_tramp_i ()); + DONE; +}") + +(define_insn "shcompact_return_tramp_i" + [(parallel [(return) (use (reg:SI R0_REG))])] + "TARGET_SHCOMPACT + && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))" + "jmp @r0%#" + [(set_attr "type" "jump_ind") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "return_media_i" + [(parallel [(return) (use (match_operand 0 "target_reg_operand" "k"))])] + "TARGET_SHMEDIA && reload_completed" + "blink %0, r63" + [(set_attr "type" "jump_media")]) + +(define_insn "return_media_rte" + [(return)] + "TARGET_SHMEDIA && reload_completed && current_function_interrupt" + "rte" + [(set_attr "type" "jump_media")]) + +(define_expand "return_media" + [(return)] + "TARGET_SHMEDIA && reload_completed" + " +{ + int tr_regno = sh_media_register_for_return (); + rtx tr; + + if (current_function_interrupt) + { + emit_jump_insn (gen_return_media_rte ()); + DONE; + } + if (tr_regno < 0) + { + rtx r18 = gen_rtx_REG (Pmode, PR_MEDIA_REG); + + gcc_assert (call_really_used_regs[TR0_REG] && !fixed_regs[TR0_REG]); + tr_regno = TR0_REG; + tr = gen_rtx_REG (Pmode, tr_regno); + emit_move_insn (tr, r18); + } + else + tr = gen_rtx_REG (Pmode, tr_regno); + + emit_jump_insn (gen_return_media_i (tr)); + DONE; +}") + +(define_insn "shcompact_preserve_incoming_args" + [(set (match_operand:SI 0 "register_operand" "+r") + (unspec:SI [(match_dup 0)] UNSPEC_COMPACT_ARGS))] + "TARGET_SHCOMPACT" + "" + [(set_attr "length" "0")]) + +(define_insn "shcompact_incoming_args" + [(set (reg:SI R2_REG) (unspec:SI [(reg:SI R2_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R3_REG) (unspec:SI [(reg:SI R3_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R4_REG) (unspec:SI [(reg:SI R4_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R5_REG) (unspec:SI [(reg:SI R5_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R6_REG) (unspec:SI [(reg:SI R6_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R7_REG) (unspec:SI [(reg:SI R7_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R8_REG) (unspec:SI [(reg:SI R8_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R9_REG) (unspec:SI [(reg:SI R9_REG)] UNSPEC_COMPACT_ARGS)) + (set (mem:BLK (reg:SI MACL_REG)) + (unspec:BLK [(reg:SI MACH_REG)] UNSPEC_COMPACT_ARGS)) + (use (reg:SI R0_REG)) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI MACL_REG)) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT" + "jsr @r0%#" + [(set_attr "needs_delay_slot" "yes")]) + +(define_insn "shmedia_save_restore_regs_compact" + [(set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 0 "immediate_operand" "i"))) + (use (reg:SI R0_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT + && (INTVAL (operands[0]) == SHMEDIA_REGS_STACK_ADJUST () + || INTVAL (operands[0]) == - SHMEDIA_REGS_STACK_ADJUST ())" + "jsr @r0%#" + [(set_attr "needs_delay_slot" "yes")]) + +(define_expand "prologue" + [(const_int 0)] + "" + "sh_expand_prologue (); DONE;") + +(define_expand "epilogue" + [(return)] + "" + " +{ + sh_expand_epilogue (0); + emit_jump_insn (gen_return ()); + DONE; +}") + +(define_expand "eh_return" + [(use (match_operand 0 "register_operand" ""))] + "" +{ + rtx ra = operands[0]; + + if (TARGET_SHMEDIA64) + emit_insn (gen_eh_set_ra_di (ra)); + else + emit_insn (gen_eh_set_ra_si (ra)); + + DONE; +}) + +;; Clobber the return address on the stack. We can't expand this +;; until we know where it will be put in the stack frame. + +(define_insn "eh_set_ra_si" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] + UNSPECV_EH_RETURN) + (clobber (match_scratch:SI 1 "=&r"))] + "! TARGET_SHMEDIA64" + "#") + +(define_insn "eh_set_ra_di" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")] + UNSPECV_EH_RETURN) + (clobber (match_scratch:DI 1 "=&r"))] + "TARGET_SHMEDIA64" + "#") + +(define_split + [(unspec_volatile [(match_operand 0 "register_operand" "")] + UNSPECV_EH_RETURN) + (clobber (match_scratch 1 ""))] + "reload_completed" + [(const_int 0)] + " +{ + sh_set_return_address (operands[0], operands[1]); + DONE; +}") + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +;; Define movml instructions for SH2A target. Currently they are +;; used to push and pop all banked registers only. + +(define_insn "movml_push_banked" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus (match_dup 0) (const_int -32))) + (set (mem:SI (plus:SI (match_dup 0) (const_int 28))) (reg:SI R7_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 24))) (reg:SI R6_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 20))) (reg:SI R5_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 16))) (reg:SI R4_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 12))) (reg:SI R3_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 8))) (reg:SI R2_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 4))) (reg:SI R1_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 0))) (reg:SI R0_REG))] + "TARGET_SH2A && REGNO (operands[0]) == 15" + "movml.l\tr7,@-r15" + [(set_attr "in_delay_slot" "no")]) + +(define_insn "movml_pop_banked" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus (match_dup 0) (const_int 32))) + (set (reg:SI R0_REG) (mem:SI (plus:SI (match_dup 0) (const_int -32)))) + (set (reg:SI R1_REG) (mem:SI (plus:SI (match_dup 0) (const_int -28)))) + (set (reg:SI R2_REG) (mem:SI (plus:SI (match_dup 0) (const_int -24)))) + (set (reg:SI R3_REG) (mem:SI (plus:SI (match_dup 0) (const_int -20)))) + (set (reg:SI R4_REG) (mem:SI (plus:SI (match_dup 0) (const_int -16)))) + (set (reg:SI R5_REG) (mem:SI (plus:SI (match_dup 0) (const_int -12)))) + (set (reg:SI R6_REG) (mem:SI (plus:SI (match_dup 0) (const_int -8)))) + (set (reg:SI R7_REG) (mem:SI (plus:SI (match_dup 0) (const_int -4))))] + "TARGET_SH2A && REGNO (operands[0]) == 15" + "movml.l\t@r15+,r7" + [(set_attr "in_delay_slot" "no")]) + +;; ------------------------------------------------------------------------ +;; Scc instructions +;; ------------------------------------------------------------------------ + +(define_insn "movt" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (eq:SI (reg:SI T_REG) (const_int 1)))] + "TARGET_SH1" + "movt %0" + [(set_attr "type" "arith")]) + +(define_expand "cstore4_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "sh_float_comparison_operator" + [(match_operand 2 "logical_operand" "") + (match_operand 3 "cmp_operand" "")]))] + "TARGET_SHMEDIA" + " +{ + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code code = GET_CODE (operands[1]); + bool invert, swap; + if (mode == VOIDmode) + mode = GET_MODE (operands[3]); + if (operands[2] == const0_rtx) + { + if (code == EQ || code == NE) + operands[2] = operands[3], operands[3] = const0_rtx; + } + else + operands[2] = force_reg (mode, operands[2]); + if (operands[3] != const0_rtx) + operands[3] = force_reg (mode, operands[3]); + + switch (code) + { + case GEU: + case GE: + swap = invert = !FLOAT_MODE_P (mode); + break; + + case LEU: + case LE: + swap = FLOAT_MODE_P (mode), invert = !swap; + break; + + case LTU: + case LT: + swap = true, invert = false; + break; + + case GTU: + case GT: + case EQ: + case UNORDERED: + swap = invert = false; + break; + + case NE: + swap = invert = true; + break; + + default: + gcc_unreachable (); + } + + if (swap) + { + rtx tem = operands[2]; + operands[2] = operands[3]; + operands[3] = tem; + code = swap_condition (code); + } + + if (invert) + { + rtx tem = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; + code = reverse_condition (code); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[2], operands[3]); + emit_insn (gen_cstore4_media (tem, operands[1], + operands[2], operands[3])); + code = EQ; + operands[2] = tem; + operands[3] = const0_rtx; + } + + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[2], operands[3]); +}") + +(define_expand "cstoresi4" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "comparison_operator" + [(match_operand:SI 2 "cmpsi_operand" "") + (match_operand:SI 3 "arith_operand" "")]))] + "TARGET_SH1 || TARGET_SHMEDIA" + "if (TARGET_SHMEDIA) + { + emit_insn (gen_cstore4_media (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && sh_expand_t_scc (operands)) + DONE; + + if (! currently_expanding_to_rtl) + FAIL; + + sh_emit_compare_and_set (operands, SImode); + DONE; +") + +(define_expand "cstoredi4" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "comparison_operator" + [(match_operand:DI 2 "arith_operand" "") + (match_operand:DI 3 "arith_operand" "")]))] + "TARGET_SH2 || TARGET_SHMEDIA" + "if (TARGET_SHMEDIA) + { + emit_insn (gen_cstore4_media (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && sh_expand_t_scc (operands)) + DONE; + + if (! currently_expanding_to_rtl) + FAIL; + + sh_emit_compare_and_set (operands, DImode); + DONE; +") + + + +;; sne moves the complement of the T reg to DEST like this: +;; cmp/eq ... +;; mov #-1,temp +;; negc temp,dest +;; This is better than xoring compare result with 1 because it does +;; not require r0 and further, the -1 may be CSE-ed or lifted out of a +;; loop. + +(define_expand "movnegt" + [(set (match_dup 1) (const_int -1)) + (parallel [(set (match_operand:SI 0 "" "") + (neg:SI (plus:SI (reg:SI T_REG) + (match_dup 1)))) + (set (reg:SI T_REG) + (ne:SI (ior:SI (reg:SI T_REG) (match_dup 1)) + (const_int 0)))])] + "" + " +{ + operands[1] = gen_reg_rtx (SImode); +}") + + +;; Recognize mov #-1/negc/neg sequence, and change it to movt/add #-1. +;; This prevents a regression that occurred when we switched from xor to +;; mov/neg for sne. + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (plus:SI (reg:SI T_REG) + (const_int -1)))] + "TARGET_SH1" + [(set (match_dup 0) (eq:SI (reg:SI T_REG) (const_int 1))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))] + "") + +(define_expand "cstoresf4" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "sh_float_comparison_operator" + [(match_operand:SF 2 "arith_operand" "") + (match_operand:SF 3 "arith_operand" "")]))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" + "if (TARGET_SHMEDIA) + { + emit_insn (gen_cstore4_media (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + if (! currently_expanding_to_rtl) + FAIL; + + sh_emit_compare_and_set (operands, SFmode); + DONE; +") + +(define_expand "cstoredf4" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "sh_float_comparison_operator" + [(match_operand:DF 2 "arith_operand" "") + (match_operand:DF 3 "arith_operand" "")]))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + "if (TARGET_SHMEDIA) + { + emit_insn (gen_cstore4_media (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + if (! currently_expanding_to_rtl) + FAIL; + + sh_emit_compare_and_set (operands, DFmode); + DONE; +") + + +;; ------------------------------------------------------------------------- +;; Instructions to cope with inline literal tables +;; ------------------------------------------------------------------------- + +; 2 byte integer in line + +(define_insn "consttable_2" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g") + (match_operand 1 "" "")] + UNSPECV_CONST2)] + "" + "* +{ + if (operands[1] != const0_rtx) + assemble_integer (operands[0], 2, BITS_PER_UNIT * 2, 1); + return \"\"; +}" + [(set_attr "length" "2") + (set_attr "in_delay_slot" "no")]) + +; 4 byte integer in line + +(define_insn "consttable_4" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g") + (match_operand 1 "" "")] + UNSPECV_CONST4)] + "" + "* +{ + if (operands[1] != const0_rtx) + { + assemble_integer (operands[0], 4, BITS_PER_UNIT * 4, 1); + mark_symbol_refs_as_used (operands[0]); + } + return \"\"; +}" + [(set_attr "length" "4") + (set_attr "in_delay_slot" "no")]) + +; 8 byte integer in line + +(define_insn "consttable_8" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g") + (match_operand 1 "" "")] + UNSPECV_CONST8)] + "" + "* +{ + if (operands[1] != const0_rtx) + assemble_integer (operands[0], 8, BITS_PER_UNIT * 8, 1); + return \"\"; +}" + [(set_attr "length" "8") + (set_attr "in_delay_slot" "no")]) + +; 4 byte floating point + +(define_insn "consttable_sf" + [(unspec_volatile [(match_operand:SF 0 "general_operand" "=g") + (match_operand 1 "" "")] + UNSPECV_CONST4)] + "" + "* +{ + if (operands[1] != const0_rtx) + { + REAL_VALUE_TYPE d; + REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]); + assemble_real (d, SFmode, GET_MODE_ALIGNMENT (SFmode)); + } + return \"\"; +}" + [(set_attr "length" "4") + (set_attr "in_delay_slot" "no")]) + +; 8 byte floating point + +(define_insn "consttable_df" + [(unspec_volatile [(match_operand:DF 0 "general_operand" "=g") + (match_operand 1 "" "")] + UNSPECV_CONST8)] + "" + "* +{ + if (operands[1] != const0_rtx) + { + REAL_VALUE_TYPE d; + REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]); + assemble_real (d, DFmode, GET_MODE_ALIGNMENT (DFmode)); + } + return \"\"; +}" + [(set_attr "length" "8") + (set_attr "in_delay_slot" "no")]) + +;; Alignment is needed for some constant tables; it may also be added for +;; Instructions at the start of loops, or after unconditional branches. +;; ??? We would get more accurate lengths if we did instruction +;; alignment based on the value of INSN_CURRENT_ADDRESS; the approach used +;; here is too conservative. + +; align to a two byte boundary + +(define_expand "align_2" + [(unspec_volatile [(const_int 1)] UNSPECV_ALIGN)] + "" + "") + +; align to a four byte boundary +;; align_4 and align_log are instructions for the starts of loops, or +;; after unconditional branches, which may take up extra room. + +(define_expand "align_4" + [(unspec_volatile [(const_int 2)] UNSPECV_ALIGN)] + "" + "") + +; align to a cache line boundary + +(define_insn "align_log" + [(unspec_volatile [(match_operand 0 "const_int_operand" "")] UNSPECV_ALIGN)] + "" + "" + [(set_attr "length" "0") + (set_attr "in_delay_slot" "no")]) + +; emitted at the end of the literal table, used to emit the +; 32bit branch labels if needed. + +(define_insn "consttable_end" + [(unspec_volatile [(const_int 0)] UNSPECV_CONST_END)] + "" + "* return output_jump_label_table ();" + [(set_attr "in_delay_slot" "no")]) + +; emitted at the end of the window in the literal table. + +(define_insn "consttable_window_end" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_WINDOW_END)] + "" + "" + [(set_attr "length" "0") + (set_attr "in_delay_slot" "no")]) + +;; ------------------------------------------------------------------------- +;; Misc +;; ------------------------------------------------------------------------- + +;; String/block move insn. + +(define_expand "movmemsi" + [(parallel [(set (mem:BLK (match_operand:BLK 0 "" "")) + (mem:BLK (match_operand:BLK 1 "" ""))) + (use (match_operand:SI 2 "nonmemory_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (clobber (reg:SI R0_REG))])] + "TARGET_SH1 && ! TARGET_SH5" + " +{ + if(expand_block_move (operands)) + DONE; + else FAIL; +}") + +(define_insn "block_move_real" + [(parallel [(set (mem:BLK (reg:SI R4_REG)) + (mem:BLK (reg:SI R5_REG))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R0_REG))])] + "TARGET_SH1 && ! TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_lump_real" + [(parallel [(set (mem:BLK (reg:SI R4_REG)) + (mem:BLK (reg:SI R5_REG))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (use (reg:SI R6_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI T_REG)) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (clobber (reg:SI R6_REG)) + (clobber (reg:SI R0_REG))])] + "TARGET_SH1 && ! TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_move_real_i4" + [(parallel [(set (mem:BLK (reg:SI R4_REG)) + (mem:BLK (reg:SI R5_REG))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R2_REG))])] + "TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_lump_real_i4" + [(parallel [(set (mem:BLK (reg:SI R4_REG)) + (mem:BLK (reg:SI R5_REG))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (use (reg:SI R6_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI T_REG)) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (clobber (reg:SI R6_REG)) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R3_REG))])] + "TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +;; ------------------------------------------------------------------------- +;; Floating point instructions. +;; ------------------------------------------------------------------------- + +;; ??? All patterns should have a type attribute. + +(define_expand "movpsi" + [(set (match_operand:PSI 0 "register_operand" "") + (match_operand:PSI 1 "general_movsrc_operand" ""))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "") + +;; The c / m alternative is a fake to guide reload to load directly into +;; fpscr, since reload doesn't know how to use post-increment. +;; TARGET_LEGITIMATE_ADDRESS_P guards about bogus addresses before reload, +;; SECONDARY_INPUT_RELOAD_CLASS does this during reload, and the insn's +;; predicate after reload. +;; The mac_gp type for r/!c might look a bit odd, but it actually schedules +;; like a mac -> gpr move. +(define_insn "fpu_switch" + [(set (match_operand:PSI 0 "general_movdst_operand" "=c,c,r,c,c,r,m,r,<") + (match_operand:PSI 1 "general_movsrc_operand" "c,>,m,m,r,r,r,!c,c"))] + "TARGET_SH2E + && (! reload_completed + || true_regnum (operands[0]) != FPSCR_REG + || !MEM_P (operands[1]) + || GET_CODE (XEXP (operands[1], 0)) != PLUS)" + "@ + ! precision stays the same + lds.l %1,fpscr + mov.l %1,%0 + # + lds %1,fpscr + mov %1,%0 + mov.l %1,%0 + sts fpscr,%0 + sts.l fpscr,%0" + [(set_attr "length" "0,2,2,4,2,2,2,2,2") + (set_attr "type" "nil,mem_fpscr,load,mem_fpscr,gp_fpscr,move,store,mac_gp,fstore")]) + +(define_peephole2 + [(set (reg:PSI FPSCR_REG) + (mem:PSI (match_operand:SI 0 "register_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && peep2_reg_dead_p (1, operands[0])" + [(const_int 0)] +{ + rtx fpscr, mem, new_insn; + + fpscr = SET_DEST (PATTERN (curr_insn)); + mem = SET_SRC (PATTERN (curr_insn)); + mem = replace_equiv_address (mem, gen_rtx_POST_INC (Pmode, operands[0])); + + new_insn = emit_insn (gen_fpu_switch (fpscr, mem)); + add_reg_note (new_insn, REG_INC, operands[0]); + DONE; +}) + +(define_split + [(set (reg:PSI FPSCR_REG) + (mem:PSI (match_operand:SI 0 "register_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) + && (flag_peephole2 ? epilogue_completed : reload_completed)" + [(const_int 0)] +{ + rtx fpscr, mem, new_insn; + + fpscr = SET_DEST (PATTERN (curr_insn)); + mem = SET_SRC (PATTERN (curr_insn)); + mem = replace_equiv_address (mem, gen_rtx_POST_INC (Pmode, operands[0])); + + new_insn = emit_insn (gen_fpu_switch (fpscr, mem)); + add_reg_note (new_insn, REG_INC, operands[0]); + + if (!find_regno_note (curr_insn, REG_DEAD, true_regnum (operands[0]))) + emit_insn (gen_addsi3 (operands[0], operands[0], GEN_INT (-4))); + DONE; +}) + +;; ??? This uses the fp unit, but has no type indicating that. +;; If we did that, this would either give a bogus latency or introduce +;; a bogus FIFO constraint. +;; Since this insn is currently only used for prologues/epilogues, +;; it is probably best to claim no function unit, which matches the +;; current setting. +(define_insn "toggle_sz" + [(set (reg:PSI FPSCR_REG) + (xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fschg" + [(set_attr "type" "fpscr_toggle") (set_attr "fp_set" "unknown")]) + +;; There's no way we can use it today, since optimize mode switching +;; doesn't enable us to know from which mode we're switching to the +;; mode it requests, to tell whether we can use a relative mode switch +;; (like toggle_pr) or an absolute switch (like loading fpscr from +;; memory). +(define_insn "toggle_pr" + [(set (reg:PSI FPSCR_REG) + (xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))] + "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE" + "fpchg" + [(set_attr "type" "fpscr_toggle")]) + +(define_expand "addsf3" + [(set (match_operand:SF 0 "arith_reg_operand" "") + (plus:SF (match_operand:SF 1 "arith_reg_operand" "") + (match_operand:SF 2 "arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH2E) + { + expand_sf_binop (&gen_addsf3_i, operands); + DONE; + } +}") + +(define_insn "*addsf3_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fadd.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_insn_and_split "unary_sf_op" + [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") + (vec_select:V2SF + (vec_concat:V2SF + (vec_select:SF + (match_dup 0) + (parallel [(not:BI (match_operand 3 "const_int_operand" "n"))])) + (match_operator:SF 2 "unary_float_operator" + [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f") + (parallel [(match_operand 4 + "const_int_operand" "n")]))])) + (parallel [(not:BI (match_dup 3)) (match_dup 3)])))] + "TARGET_SHMEDIA_FPU" + "#" + "TARGET_SHMEDIA_FPU && reload_completed" + [(set (match_dup 5) (match_dup 6))] + " +{ + int endian = TARGET_LITTLE_ENDIAN ? 0 : 1; + rtx op1 = gen_rtx_REG (SFmode, + (true_regnum (operands[1]) + + (INTVAL (operands[4]) ^ endian))); + + operands[7] = gen_rtx_REG (SFmode, + (true_regnum (operands[0]) + + (INTVAL (operands[3]) ^ endian))); + operands[6] = gen_rtx_fmt_e (GET_CODE (operands[2]), SFmode, op1); +}" + [(set_attr "type" "fparith_media")]) + +(define_insn_and_split "binary_sf_op0" + [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") + (vec_concat:V2SF + (match_operator:SF 3 "binary_float_operator" + [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF (match_operand:V2SF 2 "fp_arith_reg_operand" "f") + (parallel [(const_int 0)]))]) + (vec_select:SF + (match_dup 0) + (parallel [(const_int 1)]))))] + "TARGET_SHMEDIA_FPU" + "#" + "&& reload_completed" + [(set (match_dup 4) (match_dup 5))] + " +{ + int endian = TARGET_LITTLE_ENDIAN ? 0 : 1; + rtx op1 = gen_rtx_REG (SFmode, + true_regnum (operands[1]) + endian); + rtx op2 = gen_rtx_REG (SFmode, + true_regnum (operands[2]) + endian); + + operands[4] = gen_rtx_REG (SFmode, + true_regnum (operands[0]) + endian); + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SFmode, op1, op2); +}" + [(set_attr "type" "fparith_media")]) + +(define_insn_and_split "binary_sf_op1" + [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") + (vec_concat:V2SF + (vec_select:SF + (match_dup 0) + (parallel [(const_int 0)])) + (match_operator:SF 3 "binary_float_operator" + [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f") + (parallel [(const_int 1)])) + (vec_select:SF (match_operand:V2SF 2 "fp_arith_reg_operand" "f") + (parallel [(const_int 1)]))])))] + "TARGET_SHMEDIA_FPU" + "#" + "&& reload_completed" + [(set (match_dup 4) (match_dup 5))] + " +{ + int endian = TARGET_LITTLE_ENDIAN ? 0 : 1; + rtx op1 = gen_rtx_REG (SFmode, + true_regnum (operands[1]) + (1 ^ endian)); + rtx op2 = gen_rtx_REG (SFmode, + true_regnum (operands[2]) + (1 ^ endian)); + + operands[4] = gen_rtx_REG (SFmode, + true_regnum (operands[0]) + (1 ^ endian)); + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SFmode, op1, op2); +}" + [(set_attr "type" "fparith_media")]) + +(define_insn "addsf3_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0") + (match_operand:SF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fadd %2,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_expand "subsf3" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "") + (match_operand:SF 2 "fp_arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH2E) + { + expand_sf_binop (&gen_subsf3_i, operands); + DONE; + } +}") + +(define_insn "*subsf3_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fsub.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_insn "subsf3_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "0") + (match_operand:SF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fsub %2,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_expand "mulsf3" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "") + (match_operand:SF 2 "fp_arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" + "") + +(define_insn "*mulsf3_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fmul.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR +;; register in feeding fp instructions. Thus, in order to generate fmac, +;; we start out with a mulsf pattern that does not depend on fpscr. +;; This is split after combine to introduce the dependency, in order to +;; get mode switching and scheduling right. +(define_insn_and_split "mulsf3_ie" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SH2E" + "fmul %2,%0" + "TARGET_SH4 || TARGET_SH2A_SINGLE" + [(const_int 0)] + " +{ + emit_insn (gen_mulsf3_i4 (operands[0], operands[1], operands[2], + get_fpscr_rtx ())); + DONE; +}" + [(set_attr "type" "fp")]) + +(define_insn "mulsf3_i4" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0") + (match_operand:SF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fmul %2,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_insn "mac_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")) + (match_operand:SF 3 "fp_arith_reg_operand" "0")))] + "TARGET_SHMEDIA_FPU && TARGET_FMAC" + "fmac.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_insn "*macsf3" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w") + (match_operand:SF 2 "fp_arith_reg_operand" "f")) + (match_operand:SF 3 "arith_reg_operand" "0"))) + (use (match_operand:PSI 4 "fpscr_operand" "c"))] + "TARGET_SH2E && TARGET_FMAC" + "fmac fr0,%2,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_expand "divsf3" + [(set (match_operand:SF 0 "arith_reg_operand" "") + (div:SF (match_operand:SF 1 "arith_reg_operand" "") + (match_operand:SF 2 "arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH2E) + { + expand_sf_binop (&gen_divsf3_i, operands); + DONE; + } +}") + +(define_insn "*divsf3_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (div:SF (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fdiv.s %1, %2, %0" + [(set_attr "type" "fdiv_media")]) + +(define_insn "divsf3_i" + [(set (match_operand:SF 0 "arith_reg_dest" "=f") + (div:SF (match_operand:SF 1 "arith_reg_operand" "0") + (match_operand:SF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fdiv %2,%0" + [(set_attr "type" "fdiv") + (set_attr "fp_mode" "single")]) + +(define_insn "floatdisf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (float:SF (match_operand:DI 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "float.qs %1, %0" + [(set_attr "type" "fpconv_media")]) + +(define_expand "floatsisf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (float:SF (match_operand:SI 1 "fpul_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_SINGLE) + { + emit_sf_insn (gen_floatsisf2_i4 (operands[0], operands[1], get_fpscr_rtx ())); + DONE; + } +}") + +(define_insn "*floatsisf2_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (float:SF (match_operand:SI 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "float.ls %1, %0" + [(set_attr "type" "fpconv_media")]) + +(define_insn "floatsisf2_i4" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (float:SF (match_operand:SI 1 "fpul_operand" "y"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_SINGLE)" + "float %1,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_insn "*floatsisf2_ie" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (float:SF (match_operand:SI 1 "fpul_operand" "y")))] + "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" + "float %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "fix_truncsfdi2" + [(set (match_operand:DI 0 "fp_arith_reg_dest" "=f") + (fix:DI (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "ftrc.sq %1, %0" + [(set_attr "type" "fpconv_media")]) + +(define_expand "fix_truncsfsi2" + [(set (match_operand:SI 0 "fpul_operand" "=y") + (fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_SINGLE) + { + emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[0], operands[1], get_fpscr_rtx ())); + DONE; + } +}") + +(define_insn "*fix_truncsfsi2_media" + [(set (match_operand:SI 0 "fp_arith_reg_operand" "=f") + (fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "ftrc.sl %1, %0" + [(set_attr "type" "fpconv_media")]) + +(define_insn "fix_truncsfsi2_i4" + [(set (match_operand:SI 0 "fpul_operand" "=y") + (fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_SINGLE)" + "ftrc %1,%0" + [(set_attr "type" "ftrc_s") + (set_attr "fp_mode" "single")]) + +;; ??? This pattern is used nowhere. fix_truncsfsi2 always expands to +;; fix_truncsfsi2_i4. +;; (define_insn "fix_truncsfsi2_i4_2" +;; [(set (match_operand:SI 0 "arith_reg_operand" "=r") +;; (fix:SI (match_operand:SF 1 "arith_reg_operand" "f"))) +;; (use (reg:PSI FPSCR_REG)) +;; (clobber (reg:SI FPUL_REG))] +;; "TARGET_SH4" +;; "#" +;; [(set_attr "length" "4") +;; (set_attr "fp_mode" "single")]) + +;;(define_split +;; [(set (match_operand:SI 0 "arith_reg_operand" "=r") +;; (fix:SI (match_operand:SF 1 "arith_reg_operand" "f"))) +;; (use (match_operand:PSI 2 "fpscr_operand" "c")) +;; (clobber (reg:SI FPUL_REG))] +;; "TARGET_SH4" +;; [(parallel [(set (reg:SI FPUL_REG) (fix:SI (match_dup 1))) +;; (use (match_dup 2))]) +;; (set (match_dup 0) (reg:SI FPUL_REG))]) + +(define_insn "*fixsfsi" + [(set (match_operand:SI 0 "fpul_operand" "=y") + (fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" + "ftrc %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "cmpgtsf_t" + [(set (reg:SI T_REG) + (gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" + "fcmp/gt %1,%0" + [(set_attr "type" "fp_cmp") + (set_attr "fp_mode" "single")]) + +(define_insn "cmpeqsf_t" + [(set (reg:SI T_REG) + (eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" + "fcmp/eq %1,%0" + [(set_attr "type" "fp_cmp") + (set_attr "fp_mode" "single")]) + +(define_insn "ieee_ccmpeqsf_t" + [(set (reg:SI T_REG) + (ior:SI (reg:SI T_REG) + (eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f"))))] + "TARGET_SH2E && TARGET_IEEE && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" + "* return output_ieee_ccmpeq (insn, operands);" + [(set_attr "length" "4")]) + + +(define_insn "cmpgtsf_t_i4" + [(set (reg:SI T_REG) + (gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_SINGLE)" + "fcmp/gt %1,%0" + [(set_attr "type" "fp_cmp") + (set_attr "fp_mode" "single")]) + +(define_insn "cmpeqsf_t_i4" + [(set (reg:SI T_REG) + (eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_SINGLE)" + "fcmp/eq %1,%0" + [(set_attr "type" "fp_cmp") + (set_attr "fp_mode" "single")]) + +(define_insn "*ieee_ccmpeqsf_t_4" + [(set (reg:SI T_REG) + (ior:SI (reg:SI T_REG) + (eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f")))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_SINGLE)" + "* return output_ieee_ccmpeq (insn, operands);" + [(set_attr "length" "4") + (set_attr "fp_mode" "single")]) + +(define_insn "cmpeqsf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpeq.s %1, %2, %0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpgtsf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gt:SI (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpgt.s %1, %2, %0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpgesf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (ge:SI (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpge.s %1, %2, %0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpunsf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (unordered:SI (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpun.s %1, %2, %0" + [(set_attr "type" "fcmp_media")]) + +(define_expand "cbranchsf4" + [(set (pc) + (if_then_else (match_operator 0 "sh_float_comparison_operator" + [(match_operand:SF 1 "arith_operand" "") + (match_operand:SF 2 "arith_operand" "")]) + (match_operand 3 "" "") + (pc)))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SHMEDIA) + emit_jump_insn (gen_cbranchfp4_media (operands[0], operands[1], operands[2], + operands[3])); + else + sh_emit_compare_and_branch (operands, SFmode); + DONE; +}") + +(define_expand "negsf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH2E) + { + expand_sf_unop (&gen_negsf2_i, operands); + DONE; + } +}") + +(define_insn "*negsf2_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fneg.s %1, %0" + [(set_attr "type" "fmove_media")]) + +(define_insn "negsf2_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fneg %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "single")]) + +(define_expand "sqrtsf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))] + "TARGET_SH3E || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH3E) + { + expand_sf_unop (&gen_sqrtsf2_i, operands); + DONE; + } +}") + +(define_insn "*sqrtsf2_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fsqrt.s %1, %0" + [(set_attr "type" "fdiv_media")]) + +(define_insn "sqrtsf2_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH3E" + "fsqrt %0" + [(set_attr "type" "fdiv") + (set_attr "fp_mode" "single")]) + +(define_insn "rsqrtsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (div:SF (match_operand:SF 1 "immediate_operand" "i") + (sqrt:SF (match_operand:SF 2 "register_operand" "0")))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4A_FP && flag_unsafe_math_optimizations + && operands[1] == CONST1_RTX (SFmode)" + "fsrra %0" + [(set_attr "type" "fsrra") + (set_attr "fp_mode" "single")]) + +(define_insn "fsca" + [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") + (vec_concat:V2SF + (unspec:SF [(mult:SF + (float:SF (match_operand:SI 1 "fpul_operand" "y")) + (match_operand:SF 2 "immediate_operand" "i")) + ] UNSPEC_FSINA) + (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2)) + ] UNSPEC_FCOSA))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4A_FP && flag_unsafe_math_optimizations + && operands[2] == sh_fsca_int2sf ()" + "fsca fpul,%d0" + [(set_attr "type" "fsca") + (set_attr "fp_mode" "single")]) + +(define_expand "sinsf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")] + UNSPEC_FSINA))] + "TARGET_SH4A_FP && flag_unsafe_math_optimizations" + " +{ + rtx scaled = gen_reg_rtx (SFmode); + rtx truncated = gen_reg_rtx (SImode); + rtx fsca = gen_reg_rtx (V2SFmode); + rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ()); + + emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg)); + emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled)); + emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), + get_fpscr_rtx ())); + emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 0)); + DONE; +}") + +(define_expand "cossf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")] + UNSPEC_FCOSA))] + "TARGET_SH4A_FP && flag_unsafe_math_optimizations" + " +{ + rtx scaled = gen_reg_rtx (SFmode); + rtx truncated = gen_reg_rtx (SImode); + rtx fsca = gen_reg_rtx (V2SFmode); + rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ()); + + emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg)); + emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled)); + emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), + get_fpscr_rtx ())); + emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 4)); + DONE; +}") + +(define_expand "sindf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")] + UNSPEC_FSINA))] + "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations" + " +{ + rtx scaled = gen_reg_rtx (DFmode); + rtx truncated = gen_reg_rtx (SImode); + rtx fsca = gen_reg_rtx (V2SFmode); + rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ()); + rtx sfresult = gen_reg_rtx (SFmode); + + emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg)); + emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled)); + emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), + get_fpscr_rtx ())); + emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 0)); + emit_df_insn (gen_extendsfdf2 (operands[0], sfresult)); + DONE; +}") + +(define_expand "cosdf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")] + UNSPEC_FCOSA))] + "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations" + " +{ + rtx scaled = gen_reg_rtx (DFmode); + rtx truncated = gen_reg_rtx (SImode); + rtx fsca = gen_reg_rtx (V2SFmode); + rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ()); + rtx sfresult = gen_reg_rtx (SFmode); + + emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg)); + emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled)); + emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), + get_fpscr_rtx ())); + emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 4)); + emit_df_insn (gen_extendsfdf2 (operands[0], sfresult)); + DONE; +}") + +(define_expand "abssf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH2E) + { + expand_sf_unop (&gen_abssf2_i, operands); + DONE; + } +}") + +(define_insn "*abssf2_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fabs.s %1, %0" + [(set_attr "type" "fmove_media")]) + +(define_insn "abssf2_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fabs %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "single")]) + +(define_expand "adddf3" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "") + (match_operand:DF 2 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_binop (&gen_adddf3_i, operands); + DONE; + } +}") + +(define_insn "*adddf3_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fadd.d %1, %2, %0" + [(set_attr "type" "dfparith_media")]) + +(define_insn "adddf3_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0") + (match_operand:DF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fadd %2,%0" + [(set_attr "type" "dfp_arith") + (set_attr "fp_mode" "double")]) + +(define_expand "subdf3" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "") + (match_operand:DF 2 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_binop (&gen_subdf3_i, operands); + DONE; + } +}") + +(define_insn "*subdf3_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fsub.d %1, %2, %0" + [(set_attr "type" "dfparith_media")]) + +(define_insn "subdf3_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "0") + (match_operand:DF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fsub %2,%0" + [(set_attr "type" "dfp_arith") + (set_attr "fp_mode" "double")]) + +(define_expand "muldf3" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "") + (match_operand:DF 2 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_binop (&gen_muldf3_i, operands); + DONE; + } +}") + +(define_insn "*muldf3_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fmul.d %1, %2, %0" + [(set_attr "type" "dfmul_media")]) + +(define_insn "muldf3_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0") + (match_operand:DF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fmul %2,%0" + [(set_attr "type" "dfp_mul") + (set_attr "fp_mode" "double")]) + +(define_expand "divdf3" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (div:DF (match_operand:DF 1 "fp_arith_reg_operand" "") + (match_operand:DF 2 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_binop (&gen_divdf3_i, operands); + DONE; + } +}") + +(define_insn "*divdf3_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (div:DF (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fdiv.d %1, %2, %0" + [(set_attr "type" "dfdiv_media")]) + +(define_insn "divdf3_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (div:DF (match_operand:DF 1 "fp_arith_reg_operand" "0") + (match_operand:DF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fdiv %2,%0" + [(set_attr "type" "dfdiv") + (set_attr "fp_mode" "double")]) + +(define_insn "floatdidf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (float:DF (match_operand:DI 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "float.qd %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_expand "floatsidf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (float:DF (match_operand:SI 1 "fpul_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + emit_df_insn (gen_floatsidf2_i (operands[0], operands[1], + get_fpscr_rtx ())); + DONE; + } +}") + +(define_insn "*floatsidf2_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (float:DF (match_operand:SI 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "float.ld %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_insn "floatsidf2_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (float:DF (match_operand:SI 1 "fpul_operand" "y"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "float %1,%0" + [(set_attr "type" "dfp_conv") + (set_attr "fp_mode" "double")]) + +(define_insn "fix_truncdfdi2" + [(set (match_operand:DI 0 "fp_arith_reg_dest" "=f") + (fix:DI (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "ftrc.dq %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_expand "fix_truncdfsi2" + [(set (match_operand:SI 0 "fpul_operand" "") + (fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + emit_df_insn (gen_fix_truncdfsi2_i (operands[0], operands[1], + get_fpscr_rtx ())); + DONE; + } +}") + +(define_insn "*fix_truncdfsi2_media" + [(set (match_operand:SI 0 "fp_arith_reg_operand" "=f") + (fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "ftrc.dl %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_insn "fix_truncdfsi2_i" + [(set (match_operand:SI 0 "fpul_operand" "=y") + (fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "ftrc %1,%0" + [(set_attr "type" "dfp_conv") + (set_attr "dfp_comp" "no") + (set_attr "fp_mode" "double")]) + +;; ??? This pattern is used nowhere. fix_truncdfsi2 always expands to +;; fix_truncdfsi2_i. +;; (define_insn "fix_truncdfsi2_i4" +;; [(set (match_operand:SI 0 "arith_reg_operand" "=r") +;; (fix:SI (match_operand:DF 1 "arith_reg_operand" "f"))) +;; (use (match_operand:PSI 2 "fpscr_operand" "c")) +;; (clobber (reg:SI FPUL_REG))] +;; "TARGET_SH4" +;; "#" +;; [(set_attr "length" "4") +;; (set_attr "fp_mode" "double")]) +;; +;; (define_split +;; [(set (match_operand:SI 0 "arith_reg_operand" "=r") +;; (fix:SI (match_operand:DF 1 "arith_reg_operand" "f"))) +;; (use (match_operand:PSI 2 "fpscr_operand" "c")) +;; (clobber (reg:SI FPUL_REG))] +;; "TARGET_SH4" +;; [(parallel [(set (reg:SI FPUL_REG) (fix:SI (match_dup 1))) +;; (use (match_dup 2))]) +;; (set (match_dup 0) (reg:SI FPUL_REG))]) + +(define_insn "cmpgtdf_t" + [(set (reg:SI T_REG) + (gt:SI (match_operand:DF 0 "arith_reg_operand" "f") + (match_operand:DF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fcmp/gt %1,%0" + [(set_attr "type" "dfp_cmp") + (set_attr "fp_mode" "double")]) + +(define_insn "cmpeqdf_t" + [(set (reg:SI T_REG) + (eq:SI (match_operand:DF 0 "arith_reg_operand" "f") + (match_operand:DF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fcmp/eq %1,%0" + [(set_attr "type" "dfp_cmp") + (set_attr "fp_mode" "double")]) + +(define_insn "*ieee_ccmpeqdf_t" + [(set (reg:SI T_REG) + (ior:SI (reg:SI T_REG) + (eq:SI (match_operand:DF 0 "arith_reg_operand" "f") + (match_operand:DF 1 "arith_reg_operand" "f")))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "* return output_ieee_ccmpeq (insn, operands);" + [(set_attr "length" "4") + (set_attr "fp_mode" "double")]) + +(define_insn "cmpeqdf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpeq.d %1,%2,%0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpgtdf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gt:SI (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpgt.d %1,%2,%0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpgedf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (ge:SI (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpge.d %1,%2,%0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpundf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (unordered:SI (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpun.d %1,%2,%0" + [(set_attr "type" "fcmp_media")]) + +(define_expand "cbranchdf4" + [(set (pc) + (if_then_else (match_operator 0 "sh_float_comparison_operator" + [(match_operand:DF 1 "arith_operand" "") + (match_operand:DF 2 "arith_operand" "")]) + (match_operand 3 "" "") + (pc)))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SHMEDIA) + emit_jump_insn (gen_cbranchfp4_media (operands[0], operands[1], operands[2], + operands[3])); + else + sh_emit_compare_and_branch (operands, DFmode); + DONE; +}") + + +(define_expand "negdf2" + [(set (match_operand:DF 0 "arith_reg_operand" "") + (neg:DF (match_operand:DF 1 "arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_unop (&gen_negdf2_i, operands); + DONE; + } +}") + +(define_insn "*negdf2_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fneg.d %1, %0" + [(set_attr "type" "fmove_media")]) + +(define_insn "negdf2_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fneg %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "double")]) + +(define_expand "sqrtdf2" + [(set (match_operand:DF 0 "arith_reg_operand" "") + (sqrt:DF (match_operand:DF 1 "arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_unop (&gen_sqrtdf2_i, operands); + DONE; + } +}") + +(define_insn "*sqrtdf2_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fsqrt.d %1, %0" + [(set_attr "type" "dfdiv_media")]) + +(define_insn "sqrtdf2_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fsqrt %0" + [(set_attr "type" "dfdiv") + (set_attr "fp_mode" "double")]) + +(define_expand "absdf2" + [(set (match_operand:DF 0 "arith_reg_operand" "") + (abs:DF (match_operand:DF 1 "arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_unop (&gen_absdf2_i, operands); + DONE; + } +}") + +(define_insn "*absdf2_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fabs.d %1, %0" + [(set_attr "type" "fmove_media")]) + +(define_insn "absdf2_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fabs %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "double")]) + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (float_extend:DF (match_operand:SF 1 "fpul_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + emit_df_insn (gen_extendsfdf2_i4 (operands[0], operands[1], + get_fpscr_rtx ())); + DONE; + } +}") + +(define_insn "*extendsfdf2_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (float_extend:DF (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcnv.sd %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_insn "extendsfdf2_i4" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (float_extend:DF (match_operand:SF 1 "fpul_operand" "y"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fcnvsd %1,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "double")]) + +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "fpul_operand" "") + (float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" + " +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + emit_df_insn (gen_truncdfsf2_i4 (operands[0], operands[1], + get_fpscr_rtx ())); + DONE; + } +}") + +(define_insn "*truncdfsf2_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcnv.ds %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_insn "truncdfsf2_i4" + [(set (match_operand:SF 0 "fpul_operand" "=y") + (float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fcnvds %1,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "double")]) + +;; Bit field extract patterns. These give better code for packed bitfields, +;; because they allow auto-increment addresses to be generated. + +(define_expand "insv" + [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "") + (match_operand:SI 1 "immediate_operand" "") + (match_operand:SI 2 "immediate_operand" "")) + (match_operand:SI 3 "general_operand" ""))] + "TARGET_SH1 && ! TARGET_LITTLE_ENDIAN" + " +{ + rtx addr_target, orig_address, shift_reg, qi_val; + HOST_WIDE_INT bitsize, size, v = 0; + rtx x = operands[3]; + + if (TARGET_SH2A && TARGET_BITOPS + && (satisfies_constraint_Sbw (operands[0]) + || satisfies_constraint_Sbv (operands[0])) + && satisfies_constraint_M (operands[1]) + && satisfies_constraint_K03 (operands[2])) + { + if (satisfies_constraint_N (operands[3])) + { + emit_insn (gen_bclr_m2a (operands[0], operands[2])); + DONE; + } + else if (satisfies_constraint_M (operands[3])) + { + emit_insn (gen_bset_m2a (operands[0], operands[2])); + DONE; + } + else if ((REG_P (operands[3]) && REGNO (operands[3]) == T_REG) + && satisfies_constraint_M (operands[1])) + { + emit_insn (gen_bst_m2a (operands[0], operands[2])); + DONE; + } + else if (REG_P (operands[3]) + && satisfies_constraint_M (operands[1])) + { + emit_insn (gen_bld_reg (operands[3], const0_rtx)); + emit_insn (gen_bst_m2a (operands[0], operands[2])); + DONE; + } + } + /* ??? expmed doesn't care for non-register predicates. */ + if (! memory_operand (operands[0], VOIDmode) + || ! immediate_operand (operands[1], VOIDmode) + || ! immediate_operand (operands[2], VOIDmode) + || ! general_operand (x, VOIDmode)) + FAIL; + /* If this isn't a 16 / 24 / 32 bit field, or if + it doesn't start on a byte boundary, then fail. */ + bitsize = INTVAL (operands[1]); + if (bitsize < 16 || bitsize > 32 || bitsize % 8 != 0 + || (INTVAL (operands[2]) % 8) != 0) + FAIL; + + size = bitsize / 8; + orig_address = XEXP (operands[0], 0); + shift_reg = gen_reg_rtx (SImode); + if (CONST_INT_P (x)) + { + v = INTVAL (x); + qi_val = force_reg (QImode, GEN_INT (trunc_int_for_mode (v, QImode))); + } + else + { + emit_insn (gen_movsi (shift_reg, operands[3])); + qi_val = gen_rtx_SUBREG (QImode, shift_reg, 3); + } + addr_target = copy_addr_to_reg (plus_constant (orig_address, size - 1)); + + operands[0] = replace_equiv_address (operands[0], addr_target); + emit_insn (gen_movqi (operands[0], qi_val)); + + while (size -= 1) + { + if (CONST_INT_P (x)) + qi_val + = force_reg (QImode, GEN_INT (trunc_int_for_mode (v >>= 8, QImode))); + else + { + emit_insn (gen_lshrsi3_k (shift_reg, shift_reg, GEN_INT (8))); + qi_val = gen_rtx_SUBREG (QImode, shift_reg, 3); + } + emit_insn (gen_addsi3 (addr_target, addr_target, constm1_rtx)); + emit_insn (gen_movqi (operands[0], qi_val)); + } + + DONE; +}") + +(define_insn "movua" + [(set (match_operand:SI 0 "register_operand" "=z") + (unspec:SI [(match_operand:BLK 1 "unaligned_load_operand" "Sua>")] + UNSPEC_MOVUA))] + "TARGET_SH4A_ARCH" + "movua.l %1,%0" + [(set_attr "type" "movua")]) + +;; We shouldn't need this, but cse replaces increments with references +;; to other regs before flow has a chance to create post_inc +;; addressing modes, and only postreload's cse_move2add brings the +;; increments back to a usable form. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (mem:SI (match_operand:SI 1 "register_operand" "")) + (const_int 32) (const_int 0))) + (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))] + "TARGET_SH4A_ARCH && REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (mem:SI (post_inc:SI + (match_operand:SI 1 "register_operand" ""))) + (const_int 32) (const_int 0)))] + "") + +(define_expand "extv" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:QI 1 "unaligned_load_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "TARGET_SH4A_ARCH || TARGET_SH2A" +{ + if (TARGET_SH2A && TARGET_BITOPS + && (satisfies_constraint_Sbw (operands[1]) + || satisfies_constraint_Sbv (operands[1])) + && satisfies_constraint_M (operands[2]) + && satisfies_constraint_K03 (operands[3])) + { + emit_insn (gen_bldsign_m2a (operands[1], operands[3])); + if (REGNO (operands[0]) != T_REG) + emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG))); + DONE; + } + if (TARGET_SH4A_ARCH + && INTVAL (operands[2]) == 32 + && INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) + && MEM_P (operands[1]) && MEM_ALIGN (operands[1]) < 32) + { + rtx src = adjust_address (operands[1], BLKmode, 0); + set_mem_size (src, GEN_INT (4)); + emit_insn (gen_movua (operands[0], src)); + DONE; + } + + FAIL; +}) + +(define_expand "extzv" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand:QI 1 "unaligned_load_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "TARGET_SH4A_ARCH || TARGET_SH2A" +{ + if (TARGET_SH2A && TARGET_BITOPS + && (satisfies_constraint_Sbw (operands[1]) + || satisfies_constraint_Sbv (operands[1])) + && satisfies_constraint_M (operands[2]) + && satisfies_constraint_K03 (operands[3])) + { + emit_insn (gen_bld_m2a (operands[1], operands[3])); + if (REGNO (operands[0]) != T_REG) + emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG))); + DONE; + } + if (TARGET_SH4A_ARCH + && INTVAL (operands[2]) == 32 + && INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) + && MEM_P (operands[1]) && MEM_ALIGN (operands[1]) < 32) + { + rtx src = adjust_address (operands[1], BLKmode, 0); + set_mem_size (src, GEN_INT (4)); + emit_insn (gen_movua (operands[0], src)); + DONE; + } + + FAIL; +}) + +;; SH2A instructions for bitwise operations. + +;; Clear a bit in a memory location. +(define_insn "bclr_m2a" + [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv") + (and:QI + (not:QI (ashift:QI (const_int 1) + (match_operand:QI 1 "const_int_operand" "K03,K03"))) + (match_dup 0)))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + bclr.b\\t%1,%0 + bclr.b\\t%1,@(0,%t0)" +[(set_attr "length" "4,4")]) + +(define_insn "bclrmem_m2a" + [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv") + (and:QI (match_dup 0) + (match_operand:QI 1 "const_int_operand" "Psz,Psz")))] + "TARGET_SH2A && satisfies_constraint_Psz (operands[1]) && TARGET_BITOPS" + "@ + bclr.b\\t%W1,%0 + bclr.b\\t%W1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +;; Set a bit in a memory location. +(define_insn "bset_m2a" + [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv") + (ior:QI + (ashift:QI (const_int 1) + (match_operand:QI 1 "const_int_operand" "K03,K03")) + (match_dup 0)))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + bset.b\\t%1,%0 + bset.b\\t%1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +(define_insn "bsetmem_m2a" + [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv") + (ior:QI (match_dup 0) + (match_operand:QI 1 "const_int_operand" "Pso,Pso")))] + "TARGET_SH2A && satisfies_constraint_Pso (operands[1]) && TARGET_BITOPS" + "@ + bset.b\\t%V1,%0 + bset.b\\t%V1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +;;; Transfer the contents of the T bit to a specified bit of memory. +(define_insn "bst_m2a" + [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,m") + (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (and:QI + (not:QI (ashift:QI (const_int 1) + (match_operand:QI 1 "const_int_operand" "K03,K03"))) + (match_dup 0)) + (ior:QI + (ashift:QI (const_int 1) (match_dup 1)) + (match_dup 0))))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + bst.b\\t%1,%0 + bst.b\\t%1,@(0,%t0)" + [(set_attr "length" "4")]) + +;; Store a specified bit of memory in the T bit. +(define_insn "bld_m2a" + [(set (reg:SI T_REG) + (zero_extract:SI + (match_operand:QI 0 "bitwise_memory_operand" "Sbw,Sbv") + (const_int 1) + (match_operand 1 "const_int_operand" "K03,K03")))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + bld.b\\t%1,%0 + bld.b\\t%1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +;; Store a specified bit of memory in the T bit. +(define_insn "bldsign_m2a" + [(set (reg:SI T_REG) + (sign_extract:SI + (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m") + (const_int 1) + (match_operand 1 "const_int_operand" "K03,K03")))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + bld.b\\t%1,%0 + bld.b\\t%1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +;; Store a specified bit of the LSB 8 bits of a register in the T bit. +(define_insn "bld_reg" + [(set (reg:SI T_REG) + (zero_extract:SI (match_operand:SI 0 "arith_reg_operand" "r") + (const_int 1) + (match_operand 1 "const_int_operand" "K03")))] + "TARGET_SH2A" + "bld\\t%1,%0") + +(define_insn "*bld_regqi" + [(set (reg:SI T_REG) + (zero_extract:SI (match_operand:QI 0 "arith_reg_operand" "r") + (const_int 1) + (match_operand 1 "const_int_operand" "K03")))] + "TARGET_SH2A" + "bld\\t%1,%0") + +;; Take logical and of a specified bit of memory with the T bit and +;; store its result in the T bit. +(define_insn "band_m2a" + [(set (reg:SI T_REG) + (and:SI (reg:SI T_REG) + (zero_extract:SI + (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m") + (const_int 1) + (match_operand 1 "const_int_operand" "K03,K03"))))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + band.b\\t%1,%0 + band.b\\t%1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +(define_insn "bandreg_m2a" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (and:SI (zero_extract:SI + (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv") + (const_int 1) + (match_operand 2 "const_int_operand" "K03,K03")) + (match_operand:SI 3 "register_operand" "r,r")))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + band.b\\t%2,%1\;movt\\t%0 + band.b\\t%2,@(0,%t1)\;movt\\t%0" + [(set_attr "length" "6,6")]) + +;; Take logical or of a specified bit of memory with the T bit and +;; store its result in the T bit. +(define_insn "bor_m2a" + [(set (reg:SI T_REG) + (ior:SI (reg:SI T_REG) + (zero_extract:SI + (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m") + (const_int 1) + (match_operand 1 "const_int_operand" "K03,K03"))))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + bor.b\\t%1,%0 + bor.b\\t%1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +(define_insn "borreg_m2a" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (ior:SI (zero_extract:SI + (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv") + (const_int 1) + (match_operand 2 "const_int_operand" "K03,K03")) + (match_operand:SI 3 "register_operand" "=r,r")))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + bor.b\\t%2,%1\;movt\\t%0 + bor.b\\t%2,@(0,%t1)\;movt\\t%0" + [(set_attr "length" "6,6")]) + +;; Take exclusive or of a specified bit of memory with the T bit and +;; store its result in the T bit. +(define_insn "bxor_m2a" + [(set (reg:SI T_REG) + (xor:SI (reg:SI T_REG) + (zero_extract:SI + (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m") + (const_int 1) + (match_operand 1 "const_int_operand" "K03,K03"))))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + bxor.b\\t%1,%0 + bxor.b\\t%1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +(define_insn "bxorreg_m2a" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (xor:SI (zero_extract:SI + (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv") + (const_int 1) + (match_operand 2 "const_int_operand" "K03,K03")) + (match_operand:SI 3 "register_operand" "=r,r")))] + "TARGET_SH2A && TARGET_BITOPS" + "@ + bxor.b\\t%2,%1\;movt\\t%0 + bxor.b\\t%2,@(0,%t1)\;movt\\t%0" + [(set_attr "length" "6,6")]) + + +;; ------------------------------------------------------------------------- +;; Peepholes +;; ------------------------------------------------------------------------- +;; This matches cases where the bit in a memory location is set. +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_operand" "r,r") + (sign_extend:SI (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv"))) + (set (match_dup 0) + (ior:SI (match_dup 0) + (match_operand:SI 2 "const_int_operand" "Pso,Pso"))) + (set (match_dup 1) + (match_operand 3 "arith_reg_operand" "r,r"))] + "TARGET_SH2A && TARGET_BITOPS + && satisfies_constraint_Pso (operands[2]) + && REGNO (operands[0]) == REGNO (operands[3])" + [(set (match_dup 1) + (ior:QI (match_dup 1) + (match_dup 2)))] + "") + +;; This matches cases where the bit in a memory location is cleared. +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_operand" "r,r") + (sign_extend:SI (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv"))) + (set (match_dup 0) + (and:SI (match_dup 0) + (match_operand:SI 2 "const_int_operand" "Psz,Psz"))) + (set (match_dup 1) + (match_operand 3 "arith_reg_operand" "r,r"))] + "TARGET_SH2A && TARGET_BITOPS + && satisfies_constraint_Psz (operands[2]) + && REGNO (operands[0]) == REGNO (operands[3])" + [(set (match_dup 1) + (and:QI (match_dup 1) + (match_dup 2)))] + "") + +;; This matches cases where a stack pointer increment at the start of the +;; epilogue combines with a stack slot read loading the return value. + +(define_peephole + [(set (match_operand:SI 0 "arith_reg_operand" "") + (mem:SI (match_operand:SI 1 "arith_reg_operand" ""))) + (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))] + "TARGET_SH1 && REGNO (operands[1]) != REGNO (operands[0])" + "mov.l @%1+,%0") + +;; See the comment on the dt combiner pattern above. + +(define_peephole + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (plus:SI (match_dup 0) + (const_int -1))) + (set (reg:SI T_REG) + (eq:SI (match_dup 0) + (const_int 0)))] + "TARGET_SH2" + "dt %0") + +;; These convert sequences such as `mov #k,r0; add r15,r0; mov.l @r0,rn' +;; to `mov #k,r0; mov.l @(r0,r15),rn'. These sequences are generated by +;; reload when the constant is too large for a reg+offset address. + +;; ??? We would get much better code if this was done in reload. This would +;; require modifying find_reloads_address to recognize that if the constant +;; is out-of-range for an immediate add, then we get better code by reloading +;; the constant into a register than by reloading the sum into a register, +;; since the former is one instruction shorter if the address does not need +;; to be offsettable. Unfortunately this does not work, because there is +;; only one register, r0, that can be used as an index register. This register +;; is also the function return value register. So, if we try to force reload +;; to use double-reg addresses, then we end up with some instructions that +;; need to use r0 twice. The only way to fix this is to change the calling +;; convention so that r0 is not used to return values. + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 2 "general_movsrc_operand" ""))] + "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.l %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SI 2 "general_movdst_operand" "") + (mem:SI (match_dup 0)))] + "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.l @(%0,%1),%2") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:HI (match_dup 0)) + (match_operand:HI 2 "general_movsrc_operand" ""))] + "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.w %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:HI 2 "general_movdst_operand" "") + (mem:HI (match_dup 0)))] + "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.w @(%0,%1),%2") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:QI (match_dup 0)) + (match_operand:QI 2 "general_movsrc_operand" ""))] + "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.b %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:QI 2 "general_movdst_operand" "") + (mem:QI (match_dup 0)))] + "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.b @(%0,%1),%2") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 2 "general_movsrc_operand" ""))] + "TARGET_SH1 && REGNO (operands[0]) == 0 + && ((REG_P (operands[2]) && REGNO (operands[2]) < 16) + || (GET_CODE (operands[2]) == SUBREG + && REGNO (SUBREG_REG (operands[2])) < 16)) + && reg_unused_after (operands[0], insn)" + "mov.l %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SF 2 "general_movdst_operand" "") + + (mem:SF (match_dup 0)))] + "TARGET_SH1 && REGNO (operands[0]) == 0 + && ((REG_P (operands[2]) && REGNO (operands[2]) < 16) + || (GET_CODE (operands[2]) == SUBREG + && REGNO (SUBREG_REG (operands[2])) < 16)) + && reg_unused_after (operands[0], insn)" + "mov.l @(%0,%1),%2") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 2 "general_movsrc_operand" ""))] + "TARGET_SH2E && REGNO (operands[0]) == 0 + && ((REG_P (operands[2]) + && FP_OR_XD_REGISTER_P (REGNO (operands[2]))) + || (GET_CODE (operands[2]) == SUBREG + && FP_OR_XD_REGISTER_P (REGNO (SUBREG_REG (operands[2]))))) + && reg_unused_after (operands[0], insn)" + "fmov{.s|} %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SF 2 "general_movdst_operand" "") + + (mem:SF (match_dup 0)))] + "TARGET_SH2E && REGNO (operands[0]) == 0 + && ((REG_P (operands[2]) + && FP_OR_XD_REGISTER_P (REGNO (operands[2]))) + || (GET_CODE (operands[2]) == SUBREG + && FP_OR_XD_REGISTER_P (REGNO (SUBREG_REG (operands[2]))))) + && reg_unused_after (operands[0], insn)" + "fmov{.s|} @(%0,%1),%2") + +;; Switch to a new stack with its address in sp_switch (a SYMBOL_REF). */ +(define_insn "sp_switch_1" + [(const_int 1) (match_operand:SI 0 "symbol_ref_operand" "s")] + "TARGET_SH1" + "* +{ + output_asm_insn (\"mov.l r0,@-r15\;mov.l %0,r0\", operands); + output_asm_insn (\"mov.l @r0,r0\;mov.l r15,@-r0\", operands); + return \"mov r0,r15\"; +}" + [(set_attr "length" "10")]) + +;; Switch back to the original stack for interrupt functions with the +;; sp_switch attribute. */ +(define_insn "sp_switch_2" + [(const_int 2)] + "TARGET_SH1" + "mov.l @r15+,r15\;mov.l @r15+,r0" + [(set_attr "length" "4")]) + +;; Integer vector moves + +(define_expand "movv8qi" + [(set (match_operand:V8QI 0 "general_movdst_operand" "") + (match_operand:V8QI 1 "general_movsrc_operand" ""))] + "TARGET_SHMEDIA" + "{ if (prepare_move_operands (operands, V8QImode)) DONE; }") + +(define_insn "movv8qi_i" + [(set (match_operand:V8QI 0 "general_movdst_operand" "=r,r,r,rl,m") + (match_operand:V8QI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], V8QImode) + || sh_register_operand (operands[1], V8QImode))" + "@ + add %1, r63, %0 + movi %1, %0 + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media") + (set_attr "length" "4,4,16,4,4")]) + +(define_split + [(set (match_operand:V8QI 0 "arith_reg_dest" "") + (subreg:V8QI (const_int 0) 0))] + "TARGET_SHMEDIA" + [(set (match_dup 0) + (const_vector:V8QI [(const_int 0) (const_int 0) (const_int 0) + (const_int 0) (const_int 0) (const_int 0) + (const_int 0) (const_int 0)]))]) + +(define_split + [(set (match_operand 0 "arith_reg_dest" "") + (match_operand 1 "sh_rep_vec" ""))] + "TARGET_SHMEDIA && reload_completed + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && sh_vector_mode_supported_p (GET_MODE (operands[0])) + && GET_MODE_SIZE (GET_MODE (operands[0])) == 8 + && (XVECEXP (operands[1], 0, 0) != const0_rtx + || XVECEXP (operands[1], 0, 1) != const0_rtx) + && (XVECEXP (operands[1], 0, 0) != constm1_rtx + || XVECEXP (operands[1], 0, 1) != constm1_rtx)" + [(set (match_dup 0) (match_dup 1)) + (match_dup 2)] + " +{ + int unit_size = GET_MODE_UNIT_SIZE (GET_MODE (operands[1])); + rtx elt1 = XVECEXP (operands[1], 0, 1); + + if (unit_size > 2) + operands[2] = gen_mshflo_l (operands[0], operands[0], operands[0]); + else + { + if (unit_size < 2) + operands[0] = gen_rtx_REG (V4HImode, true_regnum (operands[0])); + operands[2] = gen_mperm_w0 (operands[0], operands[0]); + } + operands[0] = gen_rtx_REG (DImode, true_regnum (operands[0])); + operands[1] = XVECEXP (operands[1], 0, 0); + if (unit_size < 2) + { + if (CONST_INT_P (operands[1]) && CONST_INT_P (elt1)) + operands[1] + = GEN_INT (TARGET_LITTLE_ENDIAN + ? (INTVAL (operands[1]) & 0xff) + (INTVAL (elt1) << 8) + : (INTVAL (operands[1]) << 8) + (INTVAL (elt1) & 0xff)); + else + { + operands[0] = gen_rtx_REG (V2QImode, true_regnum (operands[0])); + operands[1] + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, operands[1], elt1)); + } + } +}") + +(define_split + [(set (match_operand 0 "arith_reg_dest" "") + (match_operand 1 "sh_const_vec" ""))] + "TARGET_SHMEDIA && reload_completed + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && sh_vector_mode_supported_p (GET_MODE (operands[0]))" + [(set (match_dup 0) (match_dup 1))] + " +{ + rtx v = operands[1]; + enum machine_mode new_mode + = mode_for_size (GET_MODE_BITSIZE (GET_MODE (v)), MODE_INT, 0); + + operands[0] = gen_rtx_REG (new_mode, true_regnum (operands[0])); + operands[1] + = simplify_subreg (new_mode, operands[1], GET_MODE (operands[1]), 0); +}") + +(define_expand "movv2hi" + [(set (match_operand:V2HI 0 "general_movdst_operand" "") + (match_operand:V2HI 1 "general_movsrc_operand" ""))] + "TARGET_SHMEDIA" + "{ if (prepare_move_operands (operands, V2HImode)) DONE; }") + +(define_insn "movv2hi_i" + [(set (match_operand:V2HI 0 "general_movdst_operand" "=r,r,r,rl,m") + (match_operand:V2HI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], V2HImode) + || sh_register_operand (operands[1], V2HImode))" + "@ + add.l %1, r63, %0 + movi %1, %0 + # + ld%M1.l %m1, %0 + st%M0.l %m0, %N1" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media") + (set_attr "length" "4,4,16,4,4") + (set (attr "highpart") + (cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0)) + (const_string "user")] + (const_string "ignore")))]) + +(define_expand "movv4hi" + [(set (match_operand:V4HI 0 "general_movdst_operand" "") + (match_operand:V4HI 1 "general_movsrc_operand" ""))] + "TARGET_SHMEDIA" + "{ if (prepare_move_operands (operands, V4HImode)) DONE; }") + +(define_insn "movv4hi_i" + [(set (match_operand:V4HI 0 "general_movdst_operand" "=r,r,r,rl,m") + (match_operand:V4HI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], V4HImode) + || sh_register_operand (operands[1], V4HImode))" + "@ + add %1, r63, %0 + movi %1, %0 + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media") + (set_attr "length" "4,4,16,4,4") + (set_attr "highpart" "depend")]) + +(define_expand "movv2si" + [(set (match_operand:V2SI 0 "general_movdst_operand" "") + (match_operand:V2SI 1 "general_movsrc_operand" ""))] + "TARGET_SHMEDIA" + "{ if (prepare_move_operands (operands, V2SImode)) DONE; }") + +(define_insn "movv2si_i" + [(set (match_operand:V2SI 0 "general_movdst_operand" "=r,r,r,rl,m") + (match_operand:V2SI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], V2SImode) + || sh_register_operand (operands[1], V2SImode))" + "@ + add %1, r63, %0 + # + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media") + (set_attr "length" "4,4,16,4,4") + (set_attr "highpart" "depend")]) + +;; Multimedia Intrinsics + +(define_insn "absv2si2" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (abs:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mabs.l %1, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "absv4hi2" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (abs:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mabs.w %1, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "addv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (plus:V2SI (match_operand:V2SI 1 "arith_reg_operand" "%r") + (match_operand:V2SI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "madd.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "addv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (plus:V4HI (match_operand:V4HI 1 "arith_reg_operand" "%r") + (match_operand:V4HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "madd.w %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn_and_split "addv2hi3" + [(set (match_operand:V2HI 0 "arith_reg_dest" "=r") + (plus:V2HI (match_operand:V2HI 1 "extend_reg_operand" "%r") + (match_operand:V2HI 2 "extend_reg_operand" "r")))] + "TARGET_SHMEDIA" + "#" + "TARGET_SHMEDIA" + [(const_int 0)] + " +{ + rtx src0 = simplify_gen_subreg (V4HImode, operands[1], V2HImode, 0); + rtx src1 = simplify_gen_subreg (V4HImode, operands[2], V2HImode, 0); + rtx v4hi_dst = simplify_gen_subreg (V4HImode, operands[0], V2HImode, 0); + rtx di_dst = simplify_gen_subreg (DImode, operands[0], V2HImode, 0); + rtx si_dst = simplify_gen_subreg (SImode, operands[0], V2HImode, 0); + + emit_insn (gen_addv4hi3 (v4hi_dst, src0, src1)); + emit_insn (gen_truncdisi2 (si_dst, di_dst)); + DONE; +}" + [(set_attr "highpart" "must_split")]) + +(define_insn "ssaddv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_plus:V2SI (match_operand:V2SI 1 "arith_reg_operand" "%r") + (match_operand:V2SI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "madds.l %1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "usaddv8qi3" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (us_plus:V8QI (match_operand:V8QI 1 "arith_reg_operand" "%r") + (match_operand:V8QI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "madds.ub %1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "ssaddv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ss_plus:V4HI (match_operand:V4HI 1 "arith_reg_operand" "%r") + (match_operand:V4HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "madds.w %1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpeqv8qi" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (neg:V8QI (eq:V8QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpeq.b %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpeqv2si" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (neg:V2SI (eq:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpeq.l %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpeqv4hi" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (neg:V4HI (eq:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpeq.w %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpgtuv8qi" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (neg:V8QI (gtu:V8QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpgt.ub %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpgtv2si" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (neg:V2SI (gt:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpgt.l %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpgtv4hi" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (neg:V4HI (gt:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpgt.w %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "mcmv" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_operand" "r")) + (and:DI (match_operand:DI 3 "arith_reg_operand" "0") + (not:DI (match_dup 2)))))] + "TARGET_SHMEDIA" + "mcmv %N1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "mcnvs_lw" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_concat:V4HI + (ss_truncate:V2HI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")) + (ss_truncate:V2HI (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcnvs.lw %N1, %N2, %0" + [(set_attr "type" "mcmp_media")]) + +(define_insn "mcnvs_wb" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (vec_concat:V8QI + (ss_truncate:V4QI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")) + (ss_truncate:V4QI (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcnvs.wb %N1, %N2, %0" + [(set_attr "type" "mcmp_media")]) + +(define_insn "mcnvs_wub" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (vec_concat:V8QI + (us_truncate:V4QI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")) + (us_truncate:V4QI (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcnvs.wub %N1, %N2, %0" + [(set_attr "type" "mcmp_media")]) + +(define_insn "mextr_rl" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (lshiftrt:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:HI 3 "mextr_bit_offset" "i")) + (ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (match_operand:HI 4 "mextr_bit_offset" "i"))))] + "TARGET_SHMEDIA && INTVAL (operands[3]) + INTVAL (operands[4]) == 64" + "* +{ + static char templ[21]; + + sprintf (templ, \"mextr%d\\t%%N1, %%N2, %%0\", + (int) INTVAL (operands[3]) >> 3); + return templ; +}" + [(set_attr "type" "arith_media")]) + +(define_insn "*mextr_lr" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:HI 3 "mextr_bit_offset" "i")) + (lshiftrt:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (match_operand:HI 4 "mextr_bit_offset" "i"))))] + "TARGET_SHMEDIA && INTVAL (operands[3]) + INTVAL (operands[4]) == 64" + "* +{ + static char templ[21]; + + sprintf (templ, \"mextr%d\\t%%N2, %%N1, %%0\", + (int) INTVAL (operands[4]) >> 3); + return templ; +}" + [(set_attr "type" "arith_media")]) + +; mextrN can be modelled with vec_select / vec_concat, but the selection +; vector then varies depending on endianness. +(define_expand "mextr1" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (1 * 8), GEN_INT (7 * 8))); + DONE; +}") + +(define_expand "mextr2" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (2 * 8), GEN_INT (6 * 8))); + DONE; +}") + +(define_expand "mextr3" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (3 * 8), GEN_INT (5 * 8))); + DONE; +}") + +(define_expand "mextr4" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (4 * 8), GEN_INT (4 * 8))); + DONE; +}") + +(define_expand "mextr5" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (5 * 8), GEN_INT (3 * 8))); + DONE; +}") + +(define_expand "mextr6" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (6 * 8), GEN_INT (2 * 8))); + DONE; +}") + +(define_expand "mextr7" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (7 * 8), GEN_INT (1 * 8))); + DONE; +}") + +(define_expand "mmacfx_wl" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V2HI 1 "extend_reg_operand" "") + (match_operand:V2HI 2 "extend_reg_operand" "") + (match_operand:V2SI 3 "arith_reg_operand" "")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_mmacfx_wl_i (operands[0], operands[3], + operands[1], operands[2])); + DONE; +}") + +;; This could be highpart ignore if it only had inputs 2 or 3, but input 1 +;; is depend +(define_insn "mmacfx_wl_i" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_plus:V2SI + (match_operand:V2SI 1 "arith_reg_operand" "0") + (ss_truncate:V2SI + (ashift:V2DI + (sign_extend:V2DI + (mult:V2SI + (sign_extend:V2SI (match_operand:V2HI 2 "extend_reg_operand" "r")) + (sign_extend:V2SI (match_operand:V2HI 3 "extend_reg_operand" "r")))) + (const_int 1)))))] + "TARGET_SHMEDIA" + "mmacfx.wl %2, %3, %0" + [(set_attr "type" "mac_media") + (set_attr "highpart" "depend")]) + +(define_expand "mmacnfx_wl" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V2HI 1 "extend_reg_operand" "") + (match_operand:V2HI 2 "extend_reg_operand" "") + (match_operand:V2SI 3 "arith_reg_operand" "")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_mmacnfx_wl_i (operands[0], operands[3], + operands[1], operands[2])); + DONE; +}") + +(define_insn "mmacnfx_wl_i" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_minus:V2SI + (match_operand:V2SI 1 "arith_reg_operand" "0") + (ss_truncate:V2SI + (ashift:V2DI + (sign_extend:V2DI + (mult:V2SI + (sign_extend:V2SI (match_operand:V2HI 2 "extend_reg_operand" "r")) + (sign_extend:V2SI (match_operand:V2HI 3 "extend_reg_operand" "r")))) + (const_int 1)))))] + "TARGET_SHMEDIA" + "mmacnfx.wl %2, %3, %0" + [(set_attr "type" "mac_media") + (set_attr "highpart" "depend")]) + +(define_insn "mulv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (mult:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r") + (match_operand:V2SI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mmul.l %1, %2, %0" + [(set_attr "type" "d2mpy_media") + (set_attr "highpart" "depend")]) + +(define_insn "mulv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (mult:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r") + (match_operand:V4HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mmul.w %1, %2, %0" + [(set_attr "type" "dmpy_media") + (set_attr "highpart" "depend")]) + +(define_insn "mmulfx_l" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_truncate:V2SI + (ashiftrt:V2DI + (mult:V2DI + (sign_extend:V2DI (match_operand:V2SI 1 "arith_reg_operand" "r")) + (sign_extend:V2DI (match_operand:V2SI 2 "arith_reg_operand" "r"))) + (const_int 31))))] + "TARGET_SHMEDIA" + "mmulfx.l %1, %2, %0" + [(set_attr "type" "d2mpy_media") + (set_attr "highpart" "depend")]) + +(define_insn "mmulfx_w" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ss_truncate:V4HI + (ashiftrt:V4SI + (mult:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r")) + (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r"))) + (const_int 15))))] + "TARGET_SHMEDIA" + "mmulfx.w %1, %2, %0" + [(set_attr "type" "dmpy_media") + (set_attr "highpart" "depend")]) + +(define_insn "mmulfxrp_w" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ss_truncate:V4HI + (ashiftrt:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r")) + (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r"))) + (const_int 16384)) + (const_int 15))))] + "TARGET_SHMEDIA" + "mmulfxrp.w %1, %2, %0" + [(set_attr "type" "dmpy_media") + (set_attr "highpart" "depend")]) + + +(define_expand "mmulhi_wl" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V4HI 1 "arith_reg_operand" "") + (match_operand:V4HI 2 "arith_reg_operand" "")] + "TARGET_SHMEDIA" + " +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mmul23_wl : gen_mmul01_wl) + (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_expand "mmullo_wl" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V4HI 1 "arith_reg_operand" "") + (match_operand:V4HI 2 "arith_reg_operand" "")] + "TARGET_SHMEDIA" + " +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mmul01_wl : gen_mmul23_wl) + (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "mmul23_wl" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (vec_select:V2SI + (mult:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r")) + (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r"))) + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_SHMEDIA" + "* return (TARGET_LITTLE_ENDIAN + ? \"mmulhi.wl %1, %2, %0\" + : \"mmullo.wl %1, %2, %0\");" + [(set_attr "type" "dmpy_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "big") (const_string "ignore")] + (const_string "user")))]) + +(define_insn "mmul01_wl" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (vec_select:V2SI + (mult:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r")) + (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r"))) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SHMEDIA" + "* return (TARGET_LITTLE_ENDIAN + ? \"mmullo.wl %1, %2, %0\" + : \"mmulhi.wl %1, %2, %0\");" + [(set_attr "type" "dmpy_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "little") (const_string "ignore")] + (const_string "user")))]) + + +(define_expand "mmulsum_wq" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:V4HI 1 "arith_reg_operand" "") + (match_operand:V4HI 2 "arith_reg_operand" "") + (match_operand:DI 3 "arith_reg_operand" "")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_mmulsum_wq_i (operands[0], operands[3], + operands[1], operands[2])); + DONE; +}") + +(define_insn "mmulsum_wq_i" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (plus:DI (match_operand:DI 1 "arith_reg_operand" "0") + (plus:DI + (plus:DI + (vec_select:DI + (mult:V4DI + (sign_extend:V4DI (match_operand:V4HI 2 "arith_reg_operand" "r")) + (sign_extend:V4DI (match_operand:V4HI 3 "arith_reg_operand" "r"))) + (parallel [(const_int 0)])) + (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2)) + (sign_extend:V4DI (match_dup 3))) + (parallel [(const_int 1)]))) + (plus:DI + (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2)) + (sign_extend:V4DI (match_dup 3))) + (parallel [(const_int 2)])) + (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2)) + (sign_extend:V4DI (match_dup 3))) + (parallel [(const_int 3)]))))))] + "TARGET_SHMEDIA" + "mmulsum.wq %2, %3, %0" + [(set_attr "type" "mac_media")]) + +(define_expand "mperm_w" + [(match_operand:V4HI 0 "arith_reg_dest" "=r") + (match_operand:V4HI 1 "arith_reg_operand" "r") + (match_operand:QI 2 "extend_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mperm_w_little : gen_mperm_w_big) + (operands[0], operands[1], operands[2])); + DONE; +}") + +; This use of vec_select isn't exactly correct according to rtl.texi +; (because not constant), but it seems a straightforward extension. +(define_insn "mperm_w_little" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_select:V4HI + (match_operand:V4HI 1 "arith_reg_operand" "r") + (parallel + [(zero_extract:QI (match_operand:QI 2 "extend_reg_or_0_operand" "rZ") + (const_int 2) (const_int 0)) + (zero_extract:QI (match_dup 2) (const_int 2) (const_int 2)) + (zero_extract:QI (match_dup 2) (const_int 2) (const_int 4)) + (zero_extract:QI (match_dup 2) (const_int 2) (const_int 6))])))] + "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN" + "mperm.w %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "mperm_w_big" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_select:V4HI + (match_operand:V4HI 1 "arith_reg_operand" "r") + (parallel + [(zero_extract:QI (not:QI (match_operand:QI 2 + "extend_reg_or_0_operand" "rZ")) + (const_int 2) (const_int 0)) + (zero_extract:QI (not:QI (match_dup 2)) (const_int 2) (const_int 2)) + (zero_extract:QI (not:QI (match_dup 2)) (const_int 2) (const_int 4)) + (zero_extract:QI (not:QI (match_dup 2)) + (const_int 2) (const_int 6))])))] + "TARGET_SHMEDIA && ! TARGET_LITTLE_ENDIAN" + "mperm.w %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "mperm_w0" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_duplicate:V4HI (truncate:HI (match_operand 1 + "trunc_hi_operand" "r"))))] + "TARGET_SHMEDIA" + "mperm.w %1, r63, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_expand "msad_ubq" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:V8QI 1 "arith_reg_or_0_operand" "") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "") + (match_operand:DI 3 "arith_reg_operand" "")] + "TARGET_SHMEDIA" + " +{ + emit_insn (gen_msad_ubq_i (operands[0], operands[3], + operands[1], operands[2])); + DONE; +}") + +(define_insn "msad_ubq_i" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (plus:DI + (plus:DI + (plus:DI + (plus:DI + (match_operand:DI 1 "arith_reg_operand" "0") + (abs:DI (vec_select:DI + (minus:V8DI + (zero_extend:V8DI + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")) + (zero_extend:V8DI + (match_operand:V8QI 3 "arith_reg_or_0_operand" "rZ"))) + (parallel [(const_int 0)])))) + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 1)])))) + (plus:DI + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 2)]))) + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 3)]))))) + (plus:DI + (plus:DI + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 4)]))) + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 5)])))) + (plus:DI + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 6)]))) + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 7)])))))))] + "TARGET_SHMEDIA" + "msad.ubq %N2, %N3, %0" + [(set_attr "type" "mac_media")]) + +(define_insn "mshalds_l" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_truncate:V2SI + (ashift:V2DI + (sign_extend:V2DI (match_operand:V2SI 1 "arith_reg_operand" "r")) + (and:DI (match_operand:DI 2 "arith_reg_operand" "r") + (const_int 31)))))] + "TARGET_SHMEDIA" + "mshalds.l %1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "mshalds_w" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ss_truncate:V4HI + (ashift:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r")) + (and:DI (match_operand:DI 2 "arith_reg_operand" "r") + (const_int 15)))))] + "TARGET_SHMEDIA" + "mshalds.w %1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "ashrv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ashiftrt:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshard.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "ashrv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ashiftrt:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshard.w %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "mshards_q" + [(set (match_operand:HI 0 "arith_reg_dest" "=r") + (ss_truncate:HI + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mshards.q %1, %N2, %0" + [(set_attr "type" "mcmp_media")]) + +(define_expand "mshfhi_b" + [(match_operand:V8QI 0 "arith_reg_dest" "") + (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_b : gen_mshf0_b) + (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_expand "mshflo_b" + [(match_operand:V8QI 0 "arith_reg_dest" "") + (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_b : gen_mshf4_b) + (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "mshf4_b" + [(set + (match_operand:V8QI 0 "arith_reg_dest" "=r") + (vec_select:V8QI + (vec_concat:V16QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13) + (const_int 6) (const_int 14) (const_int 7) (const_int 15)])))] + "TARGET_SHMEDIA" + "* return (TARGET_LITTLE_ENDIAN + ? \"mshfhi.b %N1, %N2, %0\" + : \"mshflo.b %N1, %N2, %0\");" + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "big") (const_string "ignore")] + (const_string "user")))]) + +(define_insn "mshf0_b" + [(set + (match_operand:V8QI 0 "arith_reg_dest" "=r") + (vec_select:V8QI + (vec_concat:V16QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9) + (const_int 2) (const_int 10) (const_int 3) (const_int 11)])))] + "TARGET_SHMEDIA" + "* return (TARGET_LITTLE_ENDIAN + ? \"mshflo.b %N1, %N2, %0\" + : \"mshfhi.b %N1, %N2, %0\");" + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "little") (const_string "ignore")] + (const_string "user")))]) + +(define_expand "mshfhi_l" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_l : gen_mshf0_l) + (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_expand "mshflo_l" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_l : gen_mshf4_l) + (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "mshf4_l" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (vec_select:V2SI + (vec_concat:V4SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 1) (const_int 3)])))] + "TARGET_SHMEDIA" + "* return (TARGET_LITTLE_ENDIAN + ? \"mshfhi.l %N1, %N2, %0\" + : \"mshflo.l %N1, %N2, %0\");" + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "big") (const_string "ignore")] + (const_string "user")))]) + +(define_insn "mshf0_l" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (vec_select:V2SI + (vec_concat:V4SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 0) (const_int 2)])))] + "TARGET_SHMEDIA" + "* return (TARGET_LITTLE_ENDIAN + ? \"mshflo.l %N1, %N2, %0\" + : \"mshfhi.l %N1, %N2, %0\");" + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "little") (const_string "ignore")] + (const_string "user")))]) + +(define_expand "mshfhi_w" + [(match_operand:V4HI 0 "arith_reg_dest" "") + (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_w : gen_mshf0_w) + (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_expand "mshflo_w" + [(match_operand:V4HI 0 "arith_reg_dest" "") + (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" + " +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_w : gen_mshf4_w) + (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "mshf4_w" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_select:V4HI + (vec_concat:V8HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] + "TARGET_SHMEDIA" + "* return (TARGET_LITTLE_ENDIAN + ? \"mshfhi.w %N1, %N2, %0\" + : \"mshflo.w %N1, %N2, %0\");" + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "big") (const_string "ignore")] + (const_string "user")))]) + +(define_insn "mshf0_w" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_select:V4HI + (vec_concat:V8HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] + "TARGET_SHMEDIA" + "* return (TARGET_LITTLE_ENDIAN + ? \"mshflo.w %N1, %N2, %0\" + : \"mshfhi.w %N1, %N2, %0\");" + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "little") (const_string "ignore")] + (const_string "user")))]) + +(define_insn "mshflo_w_x" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_select:V4HI + (vec_concat:V4HI (match_operand:V2HI 1 "extend_reg_or_0_operand" "rZ") + (match_operand:V2HI 2 "extend_reg_or_0_operand" "rZ")) + (parallel [(const_int 2) (const_int 0) (const_int 3) (const_int 1)])))] + "TARGET_SHMEDIA" + "mshflo.w %N1, %N2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +/* These are useful to expand ANDs and as combiner patterns. */ +(define_insn_and_split "mshfhi_l_di" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,f") + (ior:DI (lshiftrt:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ,f") + (const_int 32)) + (and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ,?f") + (const_int -4294967296))))] + "TARGET_SHMEDIA" + "@ + mshfhi.l %N1, %N2, %0 + #" + "TARGET_SHMEDIA && reload_completed + && ! GENERAL_REGISTER_P (true_regnum (operands[0]))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " +{ + operands[3] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[0]); + operands[6] = gen_highpart (SImode, operands[2]); +}" + [(set_attr "type" "arith_media")]) + +(define_insn "*mshfhi_l_di_rev" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (const_int -4294967296)) + (lshiftrt:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (const_int 32))))] + "TARGET_SHMEDIA" + "mshfhi.l %N2, %N1, %0" + [(set_attr "type" "arith_media")]) + +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (ior:DI (zero_extend:DI (match_operand:SI 1 + "extend_reg_or_0_operand" "")) + (and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "") + (const_int -4294967296)))) + (clobber (match_operand:DI 3 "arith_reg_dest" ""))] + "TARGET_SHMEDIA" + [(const_int 0)] + " +{ + emit_insn (gen_ashldi3_media (operands[3], + simplify_gen_subreg (DImode, operands[1], + SImode, 0), + GEN_INT (32))); + emit_insn (gen_mshfhi_l_di (operands[0], operands[3], operands[2])); + DONE; +}") + +(define_insn "mshflo_l_di" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (const_int 4294967295)) + (ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (const_int 32))))] + + "TARGET_SHMEDIA" + "mshflo.l %N1, %N2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "*mshflo_l_di_rev" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (const_int 32)) + (and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (const_int 4294967295))))] + + "TARGET_SHMEDIA" + "mshflo.l %N2, %N1, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +;; Combiner pattern for trampoline initialization. +(define_insn_and_split "*double_shori" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 32)) + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SHMEDIA + && ! (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0xffffffffUL)" + "#" + "rtx_equal_p (operands[0], operands[1])" + [(const_int 0)] + " +{ + HOST_WIDE_INT v = INTVAL (operands[2]); + + emit_insn (gen_shori_media (operands[0], operands[0], GEN_INT (v >> 16))); + emit_insn (gen_shori_media (operands[0], operands[0], GEN_INT (v & 65535))); + DONE; +}" + [(set_attr "highpart" "ignore")]) + + +(define_insn "*mshflo_l_di_x" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_or_0_operand" + "rZ")) + (ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (const_int 32))))] + + "TARGET_SHMEDIA" + "mshflo.l %N1, %N2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn_and_split "concat_v2sf" + [(set (match_operand:V2SF 0 "register_operand" "=r,f,f?") +;; (vec_concat:V2SF (match_operand:SF 1 "register_operand" "rZ,0,f") + (vec_concat:V2SF (match_operand:SF 1 "register_operand" "rZ,f,f") + (match_operand:SF 2 "register_operand" "rZ,f,f")))] + + "TARGET_SHMEDIA" + "@ + mshflo.l %N1, %N2, %0 + # + #" + "TARGET_SHMEDIA && reload_completed + && ! GENERAL_REGISTER_P (true_regnum (operands[0]))" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 4) (match_dup 2))] + " +{ + operands[3] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 0); + operands[4] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 4); +}" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "*mshflo_l_di_x_rev" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (const_int 32)) + (zero_extend:DI (match_operand:SI 2 "extend_reg_or_0_operand" "rZ"))))] + + "TARGET_SHMEDIA" + "mshflo.l %N2, %N1, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "ashlv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ashift:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "shift_count_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshlld.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_split + [(set (match_operand 0 "any_register_operand" "") + (match_operator 3 "shift_operator" + [(match_operand 1 "any_register_operand" "") + (match_operand 2 "shift_count_reg_operand" "")]))] + "TARGET_SHMEDIA && ! register_operand (operands[2], VOIDmode)" + [(set (match_dup 0) (match_dup 3))] + " +{ + rtx count = operands[2]; + enum machine_mode outer_mode = GET_MODE (operands[2]), inner_mode; + + while (GET_CODE (count) == ZERO_EXTEND || GET_CODE (count) == SIGN_EXTEND + || (GET_CODE (count) == SUBREG && SUBREG_BYTE (count) == 0) + || GET_CODE (count) == TRUNCATE) + count = XEXP (count, 0); + inner_mode = GET_MODE (count); + count = simplify_gen_subreg (outer_mode, count, inner_mode, + subreg_lowpart_offset (outer_mode, inner_mode)); + operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), + operands[1], count); +}") + +(define_insn "ashlv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ashift:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "shift_count_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshlld.w %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "lshrv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (lshiftrt:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "shift_count_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshlrd.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "lshrv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (lshiftrt:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "shift_count_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshlrd.w %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "subv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (minus:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "msub.l %N1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "subv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (minus:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "msub.w %N1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn_and_split "subv2hi3" + [(set (match_operand:V2HI 0 "arith_reg_dest" "=r") + (minus:V2HI (match_operand:V2HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "#" + "TARGET_SHMEDIA" + [(const_int 0)] + " +{ + rtx src0 = simplify_gen_subreg (V4HImode, operands[1], V2HImode, 0); + rtx src1 = simplify_gen_subreg (V4HImode, operands[2], V2HImode, 0); + rtx v4hi_dst = simplify_gen_subreg (V4HImode, operands[0], V2HImode, 0); + rtx di_dst = simplify_gen_subreg (DImode, operands[0], V2HImode, 0); + rtx si_dst = simplify_gen_subreg (SImode, operands[0], V2HImode, 0); + + emit_insn (gen_subv4hi3 (v4hi_dst, src0, src1)); + emit_insn (gen_truncdisi2 (si_dst, di_dst)); + DONE; +}" + [(set_attr "highpart" "must_split")]) + +(define_insn "sssubv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_minus:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "msubs.l %N1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "ussubv8qi3" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (us_minus:V8QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V8QI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "msubs.ub %N1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "sssubv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ss_minus:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "msubs.w %N1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +;; Floating Point Intrinsics + +(define_insn "fcosa_s" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (unspec:SF [(match_operand:SI 1 "fp_arith_reg_operand" "f")] + UNSPEC_FCOSA))] + "TARGET_SHMEDIA" + "fcosa.s %1, %0" + [(set_attr "type" "atrans_media")]) + +(define_insn "fsina_s" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (unspec:SF [(match_operand:SI 1 "fp_arith_reg_operand" "f")] + UNSPEC_FSINA))] + "TARGET_SHMEDIA" + "fsina.s %1, %0" + [(set_attr "type" "atrans_media")]) + +(define_insn "fipr" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (plus:SF (vec_select:SF (mult:V4SF (match_operand:V4SF 1 + "fp_arith_reg_operand" "f") + (match_operand:V4SF 2 + "fp_arith_reg_operand" "f")) + (parallel [(const_int 0)])) + (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2)) + (parallel [(const_int 1)]))) + (plus:SF (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2)) + (parallel [(const_int 2)])) + (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2)) + (parallel [(const_int 3)])))))] + "TARGET_SHMEDIA" + "fipr.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_insn "fsrra_s" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "f")] + UNSPEC_FSRRA))] + "TARGET_SHMEDIA" + "fsrra.s %1, %0" + [(set_attr "type" "atrans_media")]) + +(define_insn "ftrv" + [(set (match_operand:V4SF 0 "fp_arith_reg_operand" "=f") + (plus:V4SF + (plus:V4SF + (mult:V4SF + (vec_select:V4SF (match_operand:V16SF 1 "fp_arith_reg_operand" "f") + (parallel [(const_int 0) (const_int 5) + (const_int 10) (const_int 15)])) + (match_operand:V4SF 2 "fp_arith_reg_operand" "f")) + (mult:V4SF + (vec_select:V4SF (match_dup 1) + (parallel [(const_int 4) (const_int 9) + (const_int 14) (const_int 3)])) + (vec_select:V4SF (match_dup 2) + (parallel [(const_int 1) (const_int 2) + (const_int 3) (const_int 0)])))) + (plus:V4SF + (mult:V4SF + (vec_select:V4SF (match_dup 1) + (parallel [(const_int 8) (const_int 13) + (const_int 2) (const_int 7)])) + (vec_select:V4SF (match_dup 2) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)]))) + (mult:V4SF + (vec_select:V4SF (match_dup 1) + (parallel [(const_int 12) (const_int 1) + (const_int 6) (const_int 11)])) + (vec_select:V4SF (match_dup 2) + (parallel [(const_int 3) (const_int 0) + (const_int 1) (const_int 2)]))))))] + "TARGET_SHMEDIA" + "ftrv.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_insn "ldhi_l" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extract:SI + (mem:SI (plus:SI (ior:SI (match_operand:QI 1 "ua_address_operand" "p") + (const_int 3)) + (const_int -3))) + (plus:SI (and:SI (match_dup 1) (const_int 3)) (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA32" + "ldhi.l %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "ldhi_q" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (plus:SI (ior:SI (match_operand:QI 1 "ua_address_operand" "p") + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (match_dup 1) (const_int 7)) (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA32" + "ldhi.q %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn_and_split "*ldhi_q_comb0" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (plus:SI (ior:SI (plus:SI (match_operand:SI 1 + "register_operand" "r") + (match_operand:SI 2 + "ua_offset" "I06")) + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (match_dup 1) (const_int 7)) + (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & 7) == 0" + "#" + "" + [(pc)] + "emit_insn (gen_ldhi_q (operands[0], + gen_rtx_PLUS (SImode, operands[1], operands[2]))); + DONE;") + + +(define_insn_and_split "*ldhi_q_comb1" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (plus:SI (ior:SI (plus:SI (match_operand:SI 1 + "register_operand" "r") + (match_operand:SI 2 + "ua_offset" "I06")) + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (plus:SI (match_dup 1) (match_operand:SI 3 + "ua_offset" "I06")) + (const_int 7)) + (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & -8) + && (INTVAL (operands[2]) & 7) == INTVAL (operands[3])" + "#" + "" + [(pc)] + "emit_insn (gen_ldhi_q (operands[0], + gen_rtx_PLUS (SImode, operands[1], operands[2]))); + DONE;") + + +(define_insn "ldlo_l" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extract:SI + (mem:SI (and:SI (match_operand:QI 1 "ua_address_operand" "p") + (const_int -4))) + (minus:SI (const_int 4) (and:SI (match_dup 1) (const_int 3))) + (and:SI (match_dup 1) (const_int 3))))] + "TARGET_SHMEDIA32" + "ldlo.l %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "ldlo_q" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (and:SI (match_operand:QI 1 "ua_address_operand" "p") + (const_int -8))) + (minus:SI (const_int 8) (and:SI (match_dup 1) (const_int 7))) + (and:SI (match_dup 1) (const_int 7))))] + "TARGET_SHMEDIA32" + "ldlo.q %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn_and_split "*ldlo_q_comb0" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "ua_offset" "I06")) + (const_int -8))) + (minus:SI (const_int 8) (and:SI (match_dup 1) (const_int 7))) + (and:SI (match_dup 1) (const_int 7))))] + "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & 7) == 0" + "#" + "" + [(pc)] + "emit_insn (gen_ldlo_q (operands[0], + gen_rtx_PLUS (SImode, operands[1], operands[2]))); + DONE;") + +(define_insn_and_split "*ldlo_q_comb1" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "ua_offset" "I06")) + (const_int -8))) + (minus:SI (const_int 8) + (and:SI (plus:SI (match_dup 1) + (match_operand:SI 3 "ua_offset" "I06")) + (const_int 7))) + (and:SI (plus:SI (match_dup 1) (match_dup 3)) (const_int 7))))] + "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & -8) + && (INTVAL (operands[2]) & 7) == INTVAL (operands[3])" + "#" + "" + [(pc)] + "emit_insn (gen_ldlo_q (operands[0], + gen_rtx_PLUS (SImode, operands[1], operands[2]))); + DONE;") + +(define_insn "sthi_l" + [(set (zero_extract:SI + (mem:SI (plus:SI (ior:SI (match_operand:QI 0 "ua_address_operand" "p") + (const_int 3)) + (const_int -3))) + (plus:SI (and:SI (match_dup 0) (const_int 3)) (const_int 1)) + (const_int 0)) + (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32" + "sthi.l %U0, %1" + [(set_attr "type" "ustore_media")]) + +;; All unaligned stores are considered to be 'narrow' because they typically +;; operate on less that a quadword, and when they operate on a full quadword, +;; the vanilla store high / store low sequence will cause a stall if not +;; scheduled apart. +(define_insn "sthi_q" + [(set (zero_extract:DI + (mem:DI (plus:SI (ior:SI (match_operand:QI 0 "ua_address_operand" "p") + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (match_dup 0) (const_int 7)) (const_int 1)) + (const_int 0)) + (match_operand:DI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32" + "sthi.q %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn_and_split "*sthi_q_comb0" + [(set (zero_extract:DI + (mem:DI (plus:SI (ior:SI (plus:SI (match_operand:SI 0 + "register_operand" "r") + (match_operand:SI 1 "ua_offset" + "I06")) + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (match_dup 0) (const_int 7)) (const_int 1)) + (const_int 0)) + (match_operand:DI 2 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == 0" + "#" + "" + [(pc)] + "emit_insn (gen_sthi_q (gen_rtx_PLUS (SImode, operands[0], operands[1]), + operands[2])); + DONE;") + +(define_insn_and_split "*sthi_q_comb1" + [(set (zero_extract:DI + (mem:DI (plus:SI (ior:SI (plus:SI (match_operand:SI 0 + "register_operand" "r") + (match_operand:SI 1 "ua_offset" + "I06")) + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (plus:SI (match_dup 0) + (match_operand:SI 2 "ua_offset" "I06")) + (const_int 7)) + (const_int 1)) + (const_int 0)) + (match_operand:DI 3 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & -8) + && (INTVAL (operands[1]) & 7) == INTVAL (operands[2])" + "#" + "" + [(pc)] + "emit_insn (gen_sthi_q (gen_rtx_PLUS (SImode, operands[0], operands[1]), + operands[3])); + DONE;") + +;; This is highpart user because the address is used as full 64 bit. +(define_insn "stlo_l" + [(set (zero_extract:SI + (mem:SI (and:SI (match_operand:QI 0 "ua_address_operand" "p") + (const_int -4))) + (minus:SI (const_int 4) (and:SI (match_dup 0) (const_int 3))) + (and:SI (match_dup 0) (const_int 3))) + (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32" + "stlo.l %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn "stlo_q" + [(set (zero_extract:DI + (mem:DI (and:SI (match_operand:QI 0 "ua_address_operand" "p") + (const_int -8))) + (minus:SI (const_int 8) (and:SI (match_dup 0) (const_int 7))) + (and:SI (match_dup 0) (const_int 7))) + (match_operand:DI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32" + "stlo.q %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn_and_split "*stlo_q_comb0" + [(set (zero_extract:DI + (mem:DI (and:SI (plus:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "ua_offset" "I06")) + (const_int -8))) + (minus:SI (const_int 8) (and:SI (match_dup 0) (const_int 7))) + (and:SI (match_dup 0) (const_int 7))) + (match_operand:DI 2 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == 0" + "#" + "" + [(pc)] + "emit_insn (gen_stlo_q (gen_rtx_PLUS (SImode, operands[0], operands[1]), + operands[2])); + DONE;") + +(define_insn_and_split "*stlo_q_comb1" + [(set (zero_extract:DI + (mem:DI (and:SI (plus:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "ua_offset" "I06")) + (const_int -8))) + (minus:SI (const_int 8) (and:SI (plus:SI (match_dup 0) + (match_operand:SI 2 + "ua_offset" "I06")) + (const_int 7))) + (and:SI (plus:SI (match_dup 0) (match_dup 2)) (const_int 7))) + (match_operand:DI 3 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == INTVAL (operands[2])" + "#" + "" + [(pc)] + "emit_insn (gen_stlo_q (gen_rtx_PLUS (SImode, operands[0], operands[1]), + operands[3])); + DONE;") + +(define_insn "ldhi_l64" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extract:SI + (mem:SI (plus:DI (ior:DI (match_operand:QI 1 "ua_address_operand" "p") + (const_int 3)) + (const_int -3))) + (plus:DI (and:DI (match_dup 1) (const_int 3)) (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA64" + "ldhi.l %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "ldhi_q64" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (plus:DI (ior:DI (match_operand:QI 1 "ua_address_operand" "p") + (const_int 7)) + (const_int -7))) + (plus:DI (and:DI (match_dup 1) (const_int 7)) (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA64" + "ldhi.q %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "ldlo_l64" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extract:SI + (mem:SI (and:DI (match_operand:QI 1 "ua_address_operand" "p") + (const_int -4))) + (minus:DI (const_int 4) (and:DI (match_dup 1) (const_int 3))) + (and:DI (match_dup 1) (const_int 3))))] + "TARGET_SHMEDIA64" + "ldlo.l %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "ldlo_q64" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (and:DI (match_operand:QI 1 "ua_address_operand" "p") + (const_int -8))) + (minus:DI (const_int 8) (and:DI (match_dup 1) (const_int 7))) + (and:DI (match_dup 1) (const_int 7))))] + "TARGET_SHMEDIA64" + "ldlo.q %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "sthi_l64" + [(set (zero_extract:SI + (mem:SI (plus:DI (ior:DI (match_operand:QI 0 "ua_address_operand" "p") + (const_int 3)) + (const_int -3))) + (plus:DI (and:DI (match_dup 0) (const_int 3)) (const_int 1)) + (const_int 0)) + (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA64" + "sthi.l %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn "sthi_q64" + [(set (zero_extract:DI + (mem:DI (plus:DI (ior:DI (match_operand:QI 0 "ua_address_operand" "p") + (const_int 7)) + (const_int -7))) + (plus:DI (and:DI (match_dup 0) (const_int 7)) (const_int 1)) + (const_int 0)) + (match_operand:DI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA64" + "sthi.q %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn "stlo_l64" + [(set (zero_extract:SI + (mem:SI (and:DI (match_operand:QI 0 "ua_address_operand" "p") + (const_int -4))) + (minus:DI (const_int 4) (and:DI (match_dup 0) (const_int 3))) + (and:DI (match_dup 0) (const_int 3))) + (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA64" + "stlo.l %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn "stlo_q64" + [(set (zero_extract:DI + (mem:DI (and:DI (match_operand:QI 0 "ua_address_operand" "p") + (const_int -8))) + (minus:DI (const_int 8) (and:DI (match_dup 0) (const_int 7))) + (and:DI (match_dup 0) (const_int 7))) + (match_operand:DI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA64" + "stlo.q %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn "nsb" + [(set (match_operand:QI 0 "arith_reg_dest" "=r") + (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")] + UNSPEC_NSB))] + "TARGET_SHMEDIA" + "nsb %1, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "nsbsi" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extend:SI + (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")] + UNSPEC_NSB)))] + "TARGET_SHMEDIA" + "nsb %1, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "nsbdi" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extend:DI + (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")] + UNSPEC_NSB)))] + "TARGET_SHMEDIA" + "nsb %1, %0" + [(set_attr "type" "arith_media")]) + +(define_expand "ffsdi2" + [(set (match_operand:DI 0 "arith_reg_dest" "") + (ffs:DI (match_operand:DI 1 "arith_reg_operand" "")))] + "TARGET_SHMEDIA" + " +{ + rtx scratch = gen_reg_rtx (DImode); + rtx last; + + emit_insn (gen_adddi3 (scratch, operands[1], constm1_rtx)); + emit_insn (gen_xordi3 (scratch, operands[1], scratch)); + emit_insn (gen_lshrdi3_media (scratch, scratch, const1_rtx)); + emit_insn (gen_nsbdi (scratch, scratch)); + emit_insn (gen_adddi3 (scratch, scratch, GEN_INT (-64))); + emit_insn (gen_movdicc_false (scratch, operands[1], const0_rtx, scratch)); + last = emit_insn (gen_subdi3 (operands[0], const0_rtx, scratch)); + set_unique_reg_note (last, REG_EQUAL, gen_rtx_FFS (DImode, operands[0])); + + DONE; +}") + +(define_expand "ffssi2" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (ffs:SI (match_operand:SI 1 "arith_reg_operand" "")))] + "TARGET_SHMEDIA" + " +{ + rtx scratch = gen_reg_rtx (SImode); + rtx discratch = gen_reg_rtx (DImode); + rtx last; + + emit_insn (gen_adddi3 (discratch, + simplify_gen_subreg (DImode, operands[1], SImode, 0), + constm1_rtx)); + emit_insn (gen_andcdi3 (discratch, + simplify_gen_subreg (DImode, operands[1], SImode, 0), + discratch)); + emit_insn (gen_nsbsi (scratch, discratch)); + last = emit_insn (gen_subsi3 (operands[0], + force_reg (SImode, GEN_INT (63)), scratch)); + set_unique_reg_note (last, REG_EQUAL, gen_rtx_FFS (SImode, operands[0])); + + DONE; +}") + +(define_insn "byterev" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (vec_select:V8QI (match_operand:V8QI 1 "arith_reg_operand" "r") + (parallel [(const_int 7) (const_int 6) (const_int 5) + (const_int 4) (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "TARGET_SHMEDIA" + "byterev %1, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*prefetch_media" + [(prefetch (match_operand:QI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n"))] + "TARGET_SHMEDIA" + "* +{ + operands[0] = gen_rtx_MEM (QImode, operands[0]); + output_asm_insn (\"ld%M0.b %m0,r63\", operands); + return \"\"; +}" + [(set_attr "type" "other")]) + +(define_insn "*prefetch_i4" + [(prefetch (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n"))] + "(TARGET_HARD_SH4 || TARGET_SHCOMPACT) && !TARGET_VXWORKS_RTP" + "* +{ + return \"pref @%0\"; +}" + [(set_attr "type" "other")]) + +;; In user mode, the "pref" instruction will raise a RADDERR exception +;; for accesses to [0x80000000,0xffffffff]. This makes it an unsuitable +;; implementation of __builtin_prefetch for VxWorks RTPs. +(define_expand "prefetch" + [(prefetch (match_operand 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n"))] + "TARGET_SH2A || ((TARGET_HARD_SH4 || TARGET_SH5) + && (TARGET_SHMEDIA || !TARGET_VXWORKS_RTP))" + " +{ + if (GET_MODE (operands[0]) != Pmode + || !CONST_INT_P (operands[1]) + || !CONST_INT_P (operands[2])) + FAIL; + if (! TARGET_SHMEDIA) + operands[0] = force_reg (Pmode, operands[0]); +}") + +(define_insn "prefetch_m2a" + [(prefetch (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n"))] + "TARGET_SH2A" + "pref\\t@%0" + [(set_attr "type" "other")]) + +(define_insn "alloco_i" + [(set (mem:BLK (match_operand:QI 0 "cache_address_operand" "p")) + (unspec:BLK [(const_int 0)] UNSPEC_ALLOCO))] + "TARGET_SHMEDIA32" + "* +{ + rtx xops[2]; + + if (GET_CODE (operands[0]) == PLUS) + { + xops[0] = XEXP (operands[0], 0); + xops[1] = XEXP (operands[0], 1); + } + else + { + xops[0] = operands[0]; + xops[1] = const0_rtx; + } + output_asm_insn (\"alloco %0, %1\", xops); + return \"\"; +}" + [(set_attr "type" "other")]) + +(define_split + [(set (match_operand 0 "any_register_operand" "") + (match_operand 1 "" ""))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (match_dup 1))] + " +{ + int n_changes = 0; + + for_each_rtx (&operands[1], shmedia_cleanup_truncate, &n_changes); + if (!n_changes) + FAIL; +}") + +; Stack Protector Patterns + +(define_expand "stack_protect_set" + [(set (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" ""))] + "" +{ + if (TARGET_SHMEDIA) + { + if (TARGET_SHMEDIA64) + emit_insn (gen_stack_protect_set_di_media (operands[0], operands[1])); + else + emit_insn (gen_stack_protect_set_si_media (operands[0], operands[1])); + } + else + emit_insn (gen_stack_protect_set_si (operands[0], operands[1])); + + DONE; +}) + +(define_insn "stack_protect_set_si" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0))] + "!TARGET_SHMEDIA" + "mov.l\t%1, %2\;mov.l\t%2, %0\;mov\t#0, %2" + [(set_attr "type" "other") + (set_attr "length" "6")]) + +(define_insn "stack_protect_set_si_media" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0))] + "TARGET_SHMEDIA" + "ld%M1.l\t%m1, %2\;st%M0.l\t%m0, %2\;movi\t0, %2" + [(set_attr "type" "other") + (set_attr "length" "12")]) + +(define_insn "stack_protect_set_di_media" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0))] + "TARGET_SHMEDIA64" + "ld%M1.q\t%m1, %2\;st%M0.q\t%m0, %2\;movi\t0, %2" + [(set_attr "type" "other") + (set_attr "length" "12")]) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "") + (match_operand 2 "" "")] + "" +{ + if (TARGET_SHMEDIA) + { + rtx tmp = gen_reg_rtx (GET_MODE (operands[0])); + rtx test; + + test = gen_rtx_NE (VOIDmode, tmp, const0_rtx); + if (TARGET_SHMEDIA64) + { + emit_insn (gen_stack_protect_test_di_media (tmp, operands[0], + operands[1])); + emit_jump_insn (gen_cbranchdi4 (test, tmp, const0_rtx, operands[2])); + } + else + { + emit_insn (gen_stack_protect_test_si_media (tmp, operands[0], + operands[1])); + emit_jump_insn (gen_cbranchsi4 (test, tmp, const0_rtx, operands[2])); + } + } + else + { + emit_insn (gen_stack_protect_test_si (operands[0], operands[1])); + emit_jump_insn (gen_branch_true (operands[2])); + } + + DONE; +}) + +(define_insn "stack_protect_test_si" + [(set (reg:SI T_REG) + (unspec:SI [(match_operand:SI 0 "memory_operand" "m") + (match_operand:SI 1 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (set (match_scratch:SI 2 "=&r") (const_int 0)) + (set (match_scratch:SI 3 "=&r") (const_int 0))] + "!TARGET_SHMEDIA" + "mov.l\t%0, %2\;mov.l\t%1, %3\;cmp/eq\t%2, %3\;mov\t#0, %2\;mov\t#0, %3" + [(set_attr "type" "other") + (set_attr "length" "10")]) + +(define_insn "stack_protect_test_si_media" + [(set (match_operand:SI 0 "register_operand" "=&r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (set (match_scratch:SI 3 "=&r") (const_int 0))] + "TARGET_SHMEDIA" + "ld%M1.l\t%m1, %0\;ld%M2.l\t%m2, %3\;cmpeq\t%0, %3, %0\;movi\t0, %3" + [(set_attr "type" "other") + (set_attr "length" "16")]) + +(define_insn "stack_protect_test_di_media" + [(set (match_operand:DI 0 "register_operand" "=&r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (set (match_scratch:DI 3 "=&r") (const_int 0))] + "TARGET_SHMEDIA64" + "ld%M1.q\t%m1, %0\;ld%M2.q\t%m2, %3\;cmpeq\t%0, %3, %0\;movi\t0, %3" + [(set_attr "type" "other") + (set_attr "length" "16")]) diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt new file mode 100644 index 000000000..8464bd75f --- /dev/null +++ b/gcc/config/sh/sh.opt @@ -0,0 +1,338 @@ +; Options for the SH port of the compiler. + +; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 +; Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +;; Used for various architecture options. +Mask(SH_E) + +;; Set if the default precision of th FPU is single. +Mask(FPU_SINGLE) + +;; Set if we should generate code using type 2A insns. +Mask(HARD_SH2A) + +;; Set if we should generate code using type 2A DF insns. +Mask(HARD_SH2A_DOUBLE) + +;; Set if compiling for SH4 hardware (to be used for insn costs etc.) +Mask(HARD_SH4) + +;; Set if we should generate code for a SH5 CPU (either ISA). +Mask(SH5) + +;; Set if we should save all target registers. +Mask(SAVE_ALL_TARGET_REGS) + +m1 +Target RejectNegative Mask(SH1) Condition(SUPPORT_SH1) +Generate SH1 code + +m2 +Target RejectNegative Mask(SH2) Condition(SUPPORT_SH2) +Generate SH2 code + +m2a +Target RejectNegative Condition(SUPPORT_SH2A) +Generate default double-precision SH2a-FPU code + +m2a-nofpu +Target RejectNegative Condition(SUPPORT_SH2A_NOFPU) +Generate SH2a FPU-less code + +m2a-single +Target RejectNegative Condition(SUPPORT_SH2A_SINGLE) +Generate default single-precision SH2a-FPU code + +m2a-single-only +Target RejectNegative Condition(SUPPORT_SH2A_SINGLE_ONLY) +Generate only single-precision SH2a-FPU code + +m2e +Target RejectNegative Condition(SUPPORT_SH2E) +Generate SH2e code + +m3 +Target RejectNegative Mask(SH3) Condition(SUPPORT_SH3) +Generate SH3 code + +m3e +Target RejectNegative Condition(SUPPORT_SH3E) +Generate SH3e code + +m4 +Target RejectNegative Mask(SH4) Condition(SUPPORT_SH4) +Generate SH4 code + +m4-100 +Target RejectNegative Condition(SUPPORT_SH4) +Generate SH4-100 code + +m4-200 +Target RejectNegative Condition(SUPPORT_SH4) +Generate SH4-200 code + +;; TARGET_SH4_300 indicates if we have the ST40-300 instruction set and +;; pipeline - irrespective of ABI. +m4-300 +Target RejectNegative Condition(SUPPORT_SH4) Var(TARGET_SH4_300) +Generate SH4-300 code + +m4-nofpu +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) +Generate SH4 FPU-less code + +m4-100-nofpu +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) +Generate SH4-100 FPU-less code + +m4-200-nofpu +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) +Generate SH4-200 FPU-less code + +m4-300-nofpu +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300) +Generate SH4-300 FPU-less code + +m4-340 +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300) +Generate code for SH4 340 series (MMU/FPU-less) +;; passes -isa=sh4-nommu-nofpu to the assembler. + +m4-400 +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) +Generate code for SH4 400 series (MMU/FPU-less) +;; passes -isa=sh4-nommu-nofpu to the assembler. + +m4-500 +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) +Generate code for SH4 500 series (FPU-less). +;; passes -isa=sh4-nofpu to the assembler. + +m4-single +Target RejectNegative Condition(SUPPORT_SH4_SINGLE) +Generate default single-precision SH4 code + +m4-100-single +Target RejectNegative Condition(SUPPORT_SH4_SINGLE) +Generate default single-precision SH4-100 code + +m4-200-single +Target RejectNegative Condition(SUPPORT_SH4_SINGLE) +Generate default single-precision SH4-200 code + +m4-300-single +Target RejectNegative Condition(SUPPORT_SH4_SINGLE) Var(TARGET_SH4_300) +Generate default single-precision SH4-300 code + +m4-single-only +Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) +Generate only single-precision SH4 code + +m4-100-single-only +Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) +Generate only single-precision SH4-100 code + +m4-200-single-only +Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) +Generate only single-precision SH4-200 code + +m4-300-single-only +Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) Var(TARGET_SH4_300) +Generate only single-precision SH4-300 code + +m4a +Target RejectNegative Mask(SH4A) Condition(SUPPORT_SH4A) +Generate SH4a code + +m4a-nofpu +Target RejectNegative Condition(SUPPORT_SH4A_NOFPU) +Generate SH4a FPU-less code + +m4a-single +Target RejectNegative Condition(SUPPORT_SH4A_SINGLE) +Generate default single-precision SH4a code + +m4a-single-only +Target RejectNegative Condition(SUPPORT_SH4A_SINGLE_ONLY) +Generate only single-precision SH4a code + +m4al +Target RejectNegative Condition(SUPPORT_SH4AL) +Generate SH4al-dsp code + +m5-32media +Target RejectNegative Condition(SUPPORT_SH5_32MEDIA) +Generate 32-bit SHmedia code + +m5-32media-nofpu +Target RejectNegative Condition(SUPPORT_SH5_32MEDIA_NOFPU) +Generate 32-bit FPU-less SHmedia code + +m5-64media +Target RejectNegative Condition(SUPPORT_SH5_64MEDIA) +Generate 64-bit SHmedia code + +m5-64media-nofpu +Target RejectNegative Condition(SUPPORT_SH5_64MEDIA_NOFPU) +Generate 64-bit FPU-less SHmedia code + +m5-compact +Target RejectNegative Condition(SUPPORT_SH5_32MEDIA) +Generate SHcompact code + +m5-compact-nofpu +Target RejectNegative Condition(SUPPORT_SH5_32MEDIA_NOFPU) +Generate FPU-less SHcompact code + +maccumulate-outgoing-args +Target Report Var(TARGET_ACCUMULATE_OUTGOING_ARGS) Init(1) +Reserve space for outgoing arguments in the function prologue + +madjust-unroll +Target Report Mask(ADJUST_UNROLL) Condition(SUPPORT_ANY_SH5) +Throttle unrolling to avoid thrashing target registers unless the unroll benefit outweighs this + +mb +Target Report RejectNegative InverseMask(LITTLE_ENDIAN) +Generate code in big endian mode + +mbigtable +Target Report RejectNegative Mask(BIGTABLE) +Generate 32-bit offsets in switch tables + +mbitops +Target Report RejectNegative Mask(BITOPS) +Generate bit instructions + +mbranch-cost= +Target RejectNegative Joined UInteger Var(sh_branch_cost) Init(-1) +Cost to assume for a branch insn + +mcbranchdi +Target Var(TARGET_CBRANCHDI4) +Enable cbranchdi4 pattern + +mcmpeqdi +Target Var(TARGET_CMPEQDI_T) +Emit cmpeqdi_t pattern even when -mcbranchdi is in effect. + +mcut2-workaround +Target RejectNegative Var(TARGET_SH5_CUT2_WORKAROUND) +Enable SH5 cut2 workaround + +mdalign +Target Report RejectNegative Mask(ALIGN_DOUBLE) +Align doubles at 64-bit boundaries + +mdiv= +Target RejectNegative Joined Var(sh_div_str) Init("") +Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp, call-div1, call-fp, call-table + +mdivsi3_libfunc= +Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("") +Specify name for 32 bit signed division function + +mfmovd +Target RejectNegative Mask(FMOVD) +Enable the use of 64-bit floating point registers in fmov instructions. See -mdalign if 64-bit alignment is required. + +mfixed-range= +Target RejectNegative Joined Var(sh_fixed_range_str) +Specify range of registers to make fixed + +mfused-madd +Target Var(TARGET_FMAC) +Enable the use of the fused floating point multiply-accumulate operation + +mgettrcost= +Target RejectNegative Joined UInteger Var(sh_gettrcost) Init(-1) +Cost to assume for gettr insn + +mhitachi +Target Report RejectNegative Mask(HITACHI) +Follow Renesas (formerly Hitachi) / SuperH calling conventions + +mieee +Target Var(TARGET_IEEE) +Increase the IEEE compliance for floating-point comparisons + +mindexed-addressing +Target Report Mask(ALLOW_INDEXED_ADDRESS) Condition(SUPPORT_ANY_SH5_32MEDIA) +Enable the use of the indexed addressing mode for SHmedia32/SHcompact + +minline-ic_invalidate +Target Report Var(TARGET_INLINE_IC_INVALIDATE) +inline code to invalidate instruction cache entries after setting up nested function trampolines + +minvalid-symbols +Target Report Mask(INVALID_SYMBOLS) Condition(SUPPORT_ANY_SH5) +Assume symbols might be invalid + +misize +Target Report RejectNegative Mask(DUMPISIZE) +Annotate assembler instructions with estimated addresses + +ml +Target Report RejectNegative Mask(LITTLE_ENDIAN) +Generate code in little endian mode + +mnomacsave +Target Report RejectNegative Mask(NOMACSAVE) +Mark MAC register as call-clobbered + +;; ??? This option is not useful, but is retained in case there are people +;; who are still relying on it. It may be deleted in the future. +mpadstruct +Target Report RejectNegative Mask(PADSTRUCT) +Make structs a multiple of 4 bytes (warning: ABI altered) + +mprefergot +Target Report RejectNegative Mask(PREFERGOT) +Emit function-calls using global offset table when generating PIC + +mpt-fixed +Target Report Mask(PT_FIXED) Condition(SUPPORT_ANY_SH5) +Assume pt* instructions won't trap + +mrelax +Target Report RejectNegative Mask(RELAX) +Shorten address references during linking + +mrenesas +Target Mask(HITACHI) MaskExists +Follow Renesas (formerly Hitachi) / SuperH calling conventions + +mspace +Target RejectNegative Alias(Os) +Deprecated. Use -Os instead + +multcost= +Target RejectNegative Joined UInteger Var(sh_multcost) Init(-1) +Cost to assume for a multiply insn + +musermode +Target Report RejectNegative Mask(USERMODE) +Don't generate privileged-mode only code; implies -mno-inline-ic_invalidate if the inline code would not work in user mode. + +;; We might want to enable this by default for TARGET_HARD_SH4, because +;; zero-offset branches have zero latency. Needs some benchmarking. +mpretend-cmove +Target Var(TARGET_PRETEND_CMOVE) +Pretend a branch-around-a-move is a conditional move. diff --git a/gcc/config/sh/sh1.md b/gcc/config/sh/sh1.md new file mode 100644 index 000000000..970f3fc06 --- /dev/null +++ b/gcc/config/sh/sh1.md @@ -0,0 +1,85 @@ +;; DFA scheduling description for Renesas / SuperH SH. +;; Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Load and store instructions save a cycle if they are aligned on a +;; four byte boundary. Using a function unit for stores encourages +;; gcc to separate load and store instructions by one instruction, +;; which makes it more likely that the linker will be able to word +;; align them when relaxing. + +;; SH-1 scheduling. This is just a conversion of the old scheduling +;; model, using define_function_unit. + +(define_automaton "sh1") +(define_cpu_unit "sh1memory,sh1int,sh1mpy,sh1fp" "sh1") + +;; Loads have a latency of two. +;; However, call insns can have a delay slot, so that we want one more +;; insn to be scheduled between the load of the function address and the call. +;; This is equivalent to a latency of three. +;; ADJUST_COST can only properly handle reductions of the cost, so we +;; use a latency of three here. +;; We only do this for SImode loads of general registers, to make the work +;; for ADJUST_COST easier. +(define_insn_reservation "sh1_load_si" 3 + (and (eq_attr "pipe_model" "sh1") + (eq_attr "type" "load_si,pcload_si")) + "sh1memory*2") + +(define_insn_reservation "sh1_load_store" 2 + (and (eq_attr "pipe_model" "sh1") + (eq_attr "type" "load,pcload,pload,mem_mac,store,fstore,pstore,mac_mem")) + "sh1memory*2") + +(define_insn_reservation "sh1_arith3" 3 + (and (eq_attr "pipe_model" "sh1") + (eq_attr "type" "arith3,arith3b")) + "sh1int*3") + +(define_insn_reservation "sh1_dyn_shift" 2 + (and (eq_attr "pipe_model" "sh1") + (eq_attr "type" "dyn_shift")) + "sh1int*2") + +(define_insn_reservation "sh1_int" 1 + (and (eq_attr "pipe_model" "sh1") + (eq_attr "type" "!arith3,arith3b,dyn_shift")) + "sh1int") + +;; ??? These are approximations. +(define_insn_reservation "sh1_smpy" 2 + (and (eq_attr "pipe_model" "sh1") + (eq_attr "type" "smpy")) + "sh1mpy*2") + +(define_insn_reservation "sh1_dmpy" 3 + (and (eq_attr "pipe_model" "sh1") + (eq_attr "type" "dmpy")) + "sh1mpy*3") + +(define_insn_reservation "sh1_fp" 2 + (and (eq_attr "pipe_model" "sh1") + (eq_attr "type" "fp,fpscr_toggle,fp_cmp,fmove")) + "sh1fp") + +(define_insn_reservation "sh1_fdiv" 13 + (and (eq_attr "pipe_model" "sh1") + (eq_attr "type" "fdiv")) + "sh1fp*12") + diff --git a/gcc/config/sh/sh4-300.md b/gcc/config/sh/sh4-300.md new file mode 100644 index 000000000..a9fb07cac --- /dev/null +++ b/gcc/config/sh/sh4-300.md @@ -0,0 +1,287 @@ +;; DFA scheduling description for ST40-300. +;; Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Load and store instructions save a cycle if they are aligned on a +;; four byte boundary. Using a function unit for stores encourages +;; gcc to separate load and store instructions by one instruction, +;; which makes it more likely that the linker will be able to word +;; align them when relaxing. + +;; The following description models the ST40-300 pipeline using the DFA based +;; scheduler. + +;; Two automata are defined to reduce number of states +;; which a single large automaton will have. (Factoring) + +(define_automaton "sh4_300_inst_pipeline,sh4_300_fpu_pipe") + +;; This unit is basically the decode unit of the processor. +;; Since SH4 is a dual issue machine,it is as if there are two +;; units so that any insn can be processed by either one +;; of the decoding unit. + +(define_cpu_unit "sh4_300_pipe_01,sh4_300_pipe_02" "sh4_300_inst_pipeline") + +;; The floating point units. + +(define_cpu_unit "sh4_300_fpt,sh4_300_fpu,sh4_300_fds" "sh4_300_fpu_pipe") + +;; integer multiplier unit + +(define_cpu_unit "sh4_300_mul" "sh4_300_inst_pipeline") + +;; LS unit + +(define_cpu_unit "sh4_300_ls" "sh4_300_inst_pipeline") + +;; The address calculator used for branch instructions. +;; This will be reserved after "issue" of branch instructions +;; and this is to make sure that no two branch instructions +;; can be issued in parallel. + +(define_cpu_unit "sh4_300_br" "sh4_300_inst_pipeline") + +;; ---------------------------------------------------- +;; This reservation is to simplify the dual issue description. + +(define_reservation "sh4_300_issue" "sh4_300_pipe_01|sh4_300_pipe_02") + +(define_reservation "all" "sh4_300_pipe_01+sh4_300_pipe_02") + +;;(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing") + +;; MOV RM,RN / MOV #imm8,RN / STS PR,RN +(define_insn_reservation "sh4_300_mov" 0 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "move,movi8,prget")) + "sh4_300_issue") + +;; Fixed STS from MACL / MACH +(define_insn_reservation "sh4_300_mac_gp" 0 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "mac_gp")) + "sh4_300_issue+sh4_300_mul") + +;; Fixed LDS to MACL / MACH +(define_insn_reservation "sh4_300_gp_mac" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "gp_mac")) + "sh4_300_issue+sh4_300_mul") + +;; Instructions without specific resource requirements with latency 1. + +(define_insn_reservation "sh4_300_simple_arith" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "mt_group,arith,dyn_shift,prset")) + "sh4_300_issue") + +;; Load and store instructions have no alignment peculiarities for the ST40-300, +;; but they use the load-store unit, which they share with the fmove type +;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) . +;; Loads have a latency of three. + +;; Load Store instructions. +(define_insn_reservation "sh4_300_load" 3 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "load,pcload,load_si,pcload_si,pload")) + "sh4_300_issue+sh4_300_ls") + +(define_insn_reservation "sh4_300_mac_load" 3 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "mem_mac")) + "sh4_300_issue+sh4_300_ls+sh4_300_mul") + +(define_insn_reservation "sh4_300_fload" 4 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fload,pcfload")) + "sh4_300_issue+sh4_300_ls+sh4_300_fpt") + +;; sh_adjust_cost describes the reduced latency of the feeding insns of a store. +;; The latency of an auto-increment register is 1; the latency of the memory +;; output is not actually considered here anyway. +(define_insn_reservation "sh4_300_store" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "store,pstore")) + "sh4_300_issue+sh4_300_ls") + +(define_insn_reservation "sh4_300_fstore" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fstore")) + "sh4_300_issue+sh4_300_ls+sh4_300_fpt") + +;; Fixed STS.L from MACL / MACH +(define_insn_reservation "sh4_300_mac_store" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "mac_mem")) + "sh4_300_issue+sh4_300_mul+sh4_300_ls") + +(define_insn_reservation "sh4_300_gp_fpul" 2 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "gp_fpul")) + "sh4_300_issue+sh4_300_fpt") + +(define_insn_reservation "sh4_300_fpul_gp" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fpul_gp")) + "sh4_300_issue+sh4_300_fpt") + +;; Branch (BF,BF/S,BT,BT/S,BRA) +;; Branch Far (JMP,RTS,BRAF) +;; Group: BR +;; When displacement is 0 for BF / BT, we have effectively conditional +;; execution of one instruction, without pipeline disruption. +;; Otherwise, the latency depends on prediction success. +;; We can't really do much with the latency, even if we could express it, +;; but the pairing restrictions are useful to take into account. +;; ??? If the branch is likely, and not paired with a preceding insn, +;; or likely and likely not predicted, we might want to fill the delay slot. +;; However, there appears to be no machinery to make the compiler +;; recognize these scenarios. + +(define_insn_reservation "sh4_300_branch" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "cbranch,jump,return,jump_ind")) + "sh4_300_issue+sh4_300_br") + +;; RTE +(define_insn_reservation "sh4_300_return_from_exp" 9 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "rte")) + "sh4_300_pipe_01+sh4_300_pipe_02*9") + +;; OCBP, OCBWB +;; Group: CO +;; Latency: 1-5 +;; Issue Rate: 1 + +;; cwb is used for the sequence ocbwb @%0; extu.w %0,%2; or %1,%2; mov.l %0,@%2 +;; This description is likely inexact, but this pattern should not actually +;; appear when compiling for sh4-300; we should use isbi instead. +;; If a -mtune option is added later, we should use the icache array +;; dispatch method instead. +(define_insn_reservation "sh4_300_ocbwb" 3 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "cwb")) + "all*3") + +;; JSR,BSR,BSRF +;; Calls have a mandatory delay slot, which we'd like to fill with an insn +;; that can be paired with the call itself. +;; Scheduling runs before reorg, so we approximate this by saying that we +;; want the call to be paired with a preceding insn. +;; In most cases, the insn that loads the address of the call should have +;; a nonzero latency (mov rn,rm doesn't make sense since we could use rn +;; for the address then). Thus, a preceding insn that can be paired with +;; a call should be eligible for the delay slot. +;; +;; calls introduce a longisch delay that is likely to flush the pipelines +;; of the caller's instructions. Ordinary functions tend to end with a +;; load to restore a register (in the delay slot of rts), while sfuncs +;; tend to end with an EX or MT insn. But that is not actually relevant, +;; since there are no instructions that contend for memory access early. +;; We could, of course, provide exact scheduling information for specific +;; sfuncs, if that should prove useful. + +(define_insn_reservation "sh4_300_call" 16 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "call,sfunc")) + "sh4_300_issue+sh4_300_br,all*15") + +;; FMOV.S / FMOV.D +(define_insn_reservation "sh4_300_fmov" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fmove")) + "sh4_300_issue+sh4_300_fpt") + +;; LDS to FPSCR +(define_insn_reservation "sh4_300_fpscr_load" 8 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "gp_fpscr")) + "sh4_300_issue+sh4_300_fpu+sh4_300_fpt") + +;; LDS.L to FPSCR +(define_insn_reservation "sh4_300_fpscr_load_mem" 8 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "mem_fpscr")) + "sh4_300_issue+sh4_300_fpu+sh4_300_fpt+sh4_300_ls") + + +;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W) +(define_insn_reservation "multi" 2 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "smpy,dmpy")) + "sh4_300_issue+sh4_300_mul") + +;; FPCHG, FRCHG, FSCHG +(define_insn_reservation "fpscr_toggle" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fpscr_toggle")) + "sh4_300_issue+sh4_300_fpu+sh4_300_fpt") + +;; FCMP/EQ, FCMP/GT +(define_insn_reservation "fp_cmp" 3 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fp_cmp,dfp_cmp")) + "sh4_300_issue+sh4_300_fpu") + +;; Single precision floating point (FADD,FLOAT,FMAC,FMUL,FSUB,FTRC) +;; Double-precision floating-point (FADD,FCNVDS,FCNVSD,FLOAT,FSUB,FTRC) +(define_insn_reservation "fp_arith" 6 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fp,ftrc_s,dfp_arith,dfp_conv")) + "sh4_300_issue+sh4_300_fpu") + +;; Single Precision FDIV/SQRT +(define_insn_reservation "fp_div" 19 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fdiv")) + "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*15") + +;; Double-precision floating-point FMUL +(define_insn_reservation "dfp_mul" 9 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "dfp_mul")) + "sh4_300_issue+sh4_300_fpu,sh4_300_fpu*3") + +;; Double precision FDIV/SQRT +(define_insn_reservation "dp_div" 35 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "dfdiv")) + "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*31") + + +;; ??? We don't really want these for sh4-300. +;; this pattern itself is likely to finish in 3 cycles, but also +;; to disrupt branch prediction for taken branches for the following +;; condbranch. +(define_insn_reservation "sh4_300_arith3" 5 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "arith3")) + "sh4_300_issue,all*4") + +;; arith3b insns without brach redirection make use of the 0-offset 0-latency +;; branch feature, and thus schedule the same no matter if the branch is taken +;; or not. If the branch is redirected, the taken branch might take longer, +;; but then, we don't have to take the next branch. +;; ??? should we suppress branch redirection for sh4-300 to improve branch +;; target hit rates? +(define_insn_reservation "arith3b" 2 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "arith3")) + "issue,all") diff --git a/gcc/config/sh/sh4.md b/gcc/config/sh/sh4.md new file mode 100644 index 000000000..0fb4a9aec --- /dev/null +++ b/gcc/config/sh/sh4.md @@ -0,0 +1,486 @@ +;; DFA scheduling description for SH4. +;; Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Load and store instructions save a cycle if they are aligned on a +;; four byte boundary. Using a function unit for stores encourages +;; gcc to separate load and store instructions by one instruction, +;; which makes it more likely that the linker will be able to word +;; align them when relaxing. + +;; The following description models the SH4 pipeline using the DFA based +;; scheduler. The DFA based description is better way to model a +;; superscalar pipeline as compared to function unit reservation model. +;; 1. The function unit based model is oriented to describe at most one +;; unit reservation by each insn. It is difficult to model unit reservations +;; in multiple pipeline units by same insn. This can be done using DFA +;; based description. +;; 2. The execution performance of DFA based scheduler does not depend on +;; processor complexity. +;; 3. Writing all unit reservations for an instruction class is a more natural +;; description of the pipeline and makes the interface to the hazard +;; recognizer simpler than the old function unit based model. +;; 4. The DFA model is richer and is a part of greater overall framework +;; of RCSP. + + +;; Two automata are defined to reduce number of states +;; which a single large automaton will have. (Factoring) + +(define_automaton "inst_pipeline,fpu_pipe") + +;; This unit is basically the decode unit of the processor. +;; Since SH4 is a dual issue machine,it is as if there are two +;; units so that any insn can be processed by either one +;; of the decoding unit. + +(define_cpu_unit "pipe_01,pipe_02" "inst_pipeline") + + +;; The fixed point arithmetic calculator(?? EX Unit). + +(define_cpu_unit "int" "inst_pipeline") + +;; f1_1 and f1_2 are floating point units.Actually there is +;; a f1 unit which can overlap with other f1 unit but +;; not another F1 unit.It is as though there were two +;; f1 units. + +(define_cpu_unit "f1_1,f1_2" "fpu_pipe") + +;; The floating point units (except FS - F2 always precedes it.) + +(define_cpu_unit "F0,F1,F2,F3" "fpu_pipe") + +;; This is basically the MA unit of SH4 +;; used in LOAD/STORE pipeline. + +(define_cpu_unit "memory" "inst_pipeline") + +;; However, there are LS group insns that don't use it, even ones that +;; complete in 0 cycles. So we use an extra unit for the issue of LS insns. +(define_cpu_unit "load_store" "inst_pipeline") + +;; The address calculator used for branch instructions. +;; This will be reserved after "issue" of branch instructions +;; and this is to make sure that no two branch instructions +;; can be issued in parallel. + +(define_cpu_unit "pcr_addrcalc" "inst_pipeline") + +;; ---------------------------------------------------- +;; This reservation is to simplify the dual issue description. + +(define_reservation "issue" "pipe_01|pipe_02") + +;; This is to express the locking of D stage. +;; Note that the issue of a CO group insn also effectively locks the D stage. + +(define_reservation "d_lock" "pipe_01+pipe_02") + +;; Every FE instruction but fipr / ftrv starts with issue and this. +(define_reservation "F01" "F0+F1") + +;; This is to simplify description where F1,F2,FS +;; are used simultaneously. + +(define_reservation "fpu" "F1+F2") + +;; This is to highlight the fact that f1 +;; cannot overlap with F1. + +(exclusion_set "f1_1,f1_2" "F1") + +(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing") + +;; Although reg moves have a latency of zero +;; we need to highlight that they use D stage +;; for one cycle. + +;; Group: MT + +(define_insn_reservation "reg_mov" 0 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "move")) + "issue") + +;; Group: LS + +(define_insn_reservation "freg_mov" 0 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "fmove")) + "issue+load_store") + +;; We don't model all pipeline stages; we model the issue ('D') stage +;; inasmuch as we allow only two instructions to issue simultaneously, +;; and CO instructions prevent any simultaneous issue of another instruction. +;; (This uses pipe_01 and pipe_02). +;; Double issue of EX insns is prevented by using the int unit in the EX stage. +;; Double issue of EX / BR insns is prevented by using the int unit / +;; pcr_addrcalc unit in the EX stage. +;; Double issue of BR / LS instructions is prevented by using the +;; pcr_addrcalc / load_store unit in the issue cycle. +;; Double issue of FE instructions is prevented by using F0 in the first +;; pipeline stage after the first D stage. +;; There is no need to describe the [ES]X / [MN]A / S stages after a D stage +;; (except in the cases outlined above), nor to describe the FS stage after +;; the F2 stage. + +;; Other MT group instructions(1 step operations) +;; Group: MT +;; Latency: 1 +;; Issue Rate: 1 + +(define_insn_reservation "mt" 1 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "mt_group")) + "issue") + +;; Fixed Point Arithmetic Instructions(1 step operations) +;; Group: EX +;; Latency: 1 +;; Issue Rate: 1 + +(define_insn_reservation "sh4_simple_arith" 1 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "insn_class" "ex_group")) + "issue,int") + +;; Load and store instructions have no alignment peculiarities for the SH4, +;; but they use the load-store unit, which they share with the fmove type +;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) . +;; Loads have a latency of two. +;; However, call insns can only paired with a preceding insn, and have +;; a delay slot, so that we want two more insns to be scheduled between the +;; load of the function address and the call. This is equivalent to a +;; latency of three. +;; ADJUST_COST can only properly handle reductions of the cost, so we +;; use a latency of three here, which gets multiplied by 10 to yield 30. +;; We only do this for SImode loads of general registers, to make the work +;; for ADJUST_COST easier. + +;; Load Store instructions. (MOV.[BWL]@(d,GBR) +;; Group: LS +;; Latency: 2 +;; Issue Rate: 1 + +(define_insn_reservation "sh4_load" 2 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "load,pcload")) + "issue+load_store,nothing,memory") + +;; calls / sfuncs need an extra instruction for their delay slot. +;; Moreover, estimating the latency for SImode loads as 3 will also allow +;; adjust_cost to meaningfully bump it back up to 3 if they load the shift +;; count of a dynamic shift. +(define_insn_reservation "sh4_load_si" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "load_si,pcload_si")) + "issue+load_store,nothing,memory") + +;; (define_bypass 2 "sh4_load_si" "!sh4_call") + +;; The load latency is upped to three higher if the dependent insn does +;; double precision computation. We want the 'default' latency to reflect +;; that increased latency because otherwise the insn priorities won't +;; allow proper scheduling. +(define_insn_reservation "sh4_fload" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "fload,pcfload")) + "issue+load_store,nothing,memory") + +;; (define_bypass 2 "sh4_fload" "!") + +(define_insn_reservation "sh4_store" 1 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "store,fstore")) + "issue+load_store,nothing,memory") + +(define_insn_reservation "mac_mem" 1 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "mac_mem")) + "d_lock,nothing,memory") + +;; Load Store instructions. +;; Group: LS +;; Latency: 1 +;; Issue Rate: 1 + +(define_insn_reservation "sh4_gp_fpul" 1 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "gp_fpul")) + "issue+load_store") + +;; Load Store instructions. +;; Group: LS +;; Latency: 3 +;; Issue Rate: 1 + +(define_insn_reservation "sh4_fpul_gp" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "fpul_gp")) + "issue+load_store") + +;; Branch (BF,BF/S,BT,BT/S,BRA) +;; Group: BR +;; Latency when taken: 2 (or 1) +;; Issue Rate: 1 +;; The latency is 1 when displacement is 0. +;; We can't really do much with the latency, even if we could express it, +;; but the pairing restrictions are useful to take into account. +;; ??? If the branch is likely, we might want to fill the delay slot; +;; if the branch is likely, but not very likely, should we pretend to use +;; a resource that CO instructions use, to get a pairable delay slot insn? + +(define_insn_reservation "sh4_branch" 1 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "cbranch,jump")) + "issue+pcr_addrcalc") + +;; Branch Far (JMP,RTS,BRAF) +;; Group: CO +;; Latency: 3 +;; Issue Rate: 2 +;; ??? Scheduling happens before branch shortening, and hence jmp and braf +;; can't be distinguished from bra for the "jump" pattern. + +(define_insn_reservation "sh4_return" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "return,jump_ind")) + "d_lock*2") + +;; RTE +;; Group: CO +;; Latency: 5 +;; Issue Rate: 5 +;; this instruction can be executed in any of the pipelines +;; and blocks the pipeline for next 4 stages. + +(define_insn_reservation "sh4_return_from_exp" 5 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "rte")) + "d_lock*5") + +;; OCBP, OCBWB +;; Group: CO +;; Latency: 1-5 +;; Issue Rate: 1 + +;; cwb is used for the sequence ocbwb @%0; extu.w %0,%2; or %1,%2; mov.l %0,@%2 +;; ocbwb on its own would be "d_lock,nothing,memory*5" +(define_insn_reservation "ocbwb" 6 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "cwb")) + "d_lock*2,(d_lock+memory)*3,issue+load_store+memory,memory*2") + +;; LDS to PR,JSR +;; Group: CO +;; Latency: 3 +;; Issue Rate: 2 +;; The SX stage is blocked for last 2 cycles. +;; OTOH, the only time that has an effect for insns generated by the compiler +;; is when lds to PR is followed by sts from PR - and that is highly unlikely - +;; or when we are doing a function call - and we don't do inter-function +;; scheduling. For the function call case, it's really best that we end with +;; something that models an rts. + +(define_insn_reservation "sh4_lds_to_pr" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "prset") ) + "d_lock*2") + +;; calls introduce a longisch delay that is likely to flush the pipelines +;; of the caller's instructions. Ordinary functions tend to end with a +;; load to restore a register (in the delay slot of rts), while sfuncs +;; tend to end with an EX or MT insn. But that is not actually relevant, +;; since there are no instructions that contend for memory access early. +;; We could, of course, provide exact scheduling information for specific +;; sfuncs, if that should prove useful. + +(define_insn_reservation "sh4_call" 16 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "call,sfunc")) + "d_lock*16") + +;; LDS.L to PR +;; Group: CO +;; Latency: 3 +;; Issue Rate: 2 +;; The SX unit is blocked for last 2 cycles. + +(define_insn_reservation "ldsmem_to_pr" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "pload")) + "d_lock*2") + +;; STS from PR +;; Group: CO +;; Latency: 2 +;; Issue Rate: 2 +;; The SX unit in second and third cycles. + +(define_insn_reservation "sts_from_pr" 2 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "prget")) + "d_lock*2") + +;; STS.L from PR +;; Group: CO +;; Latency: 2 +;; Issue Rate: 2 + +(define_insn_reservation "sh4_prstore_mem" 2 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "pstore")) + "d_lock*2,nothing,memory") + +;; LDS to FPSCR +;; Group: CO +;; Latency: 4 +;; Issue Rate: 1 +;; F1 is blocked for last three cycles. + +(define_insn_reservation "fpscr_load" 4 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "gp_fpscr")) + "d_lock,nothing,F1*3") + +;; LDS.L to FPSCR +;; Group: CO +;; Latency: 1 / 4 +;; Latency to update Rn is 1 and latency to update FPSCR is 4 +;; Issue Rate: 1 +;; F1 is blocked for last three cycles. + +(define_insn_reservation "fpscr_load_mem" 4 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "mem_fpscr")) + "d_lock,nothing,(F1+memory),F1*2") + + +;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W) +;; Group: CO +;; Latency: 4 / 4 +;; Issue Rate: 2 + +(define_insn_reservation "multi" 4 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "smpy,dmpy")) + "d_lock,(d_lock+f1_1),(f1_1|f1_2)*3,F2") + +;; Fixed STS from, and LDS to MACL / MACH +;; Group: CO +;; Latency: 3 +;; Issue Rate: 1 + +(define_insn_reservation "sh4_mac_gp" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "mac_gp,gp_mac,mem_mac")) + "d_lock") + + +;; Single precision floating point computation FCMP/EQ, +;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRCHG, FSCHG +;; Group: FE +;; Latency: 3/4 +;; Issue Rate: 1 + +(define_insn_reservation "fp_arith" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "fp,fp_cmp")) + "issue,F01,F2") + +;; We don't model the resource usage of this exactly because that would +;; introduce a bogus latency. +(define_insn_reservation "sh4_fpscr_toggle" 1 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "fpscr_toggle")) + "issue") + +(define_insn_reservation "fp_arith_ftrc" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "ftrc_s")) + "issue,F01,F2") + +(define_bypass 1 "fp_arith_ftrc" "sh4_fpul_gp") + +;; Single Precision FDIV/SQRT +;; Group: FE +;; Latency: 12/13 (FDIV); 11/12 (FSQRT) +;; Issue Rate: 1 +;; We describe fdiv here; fsqrt is actually one cycle faster. + +(define_insn_reservation "fp_div" 12 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "fdiv")) + "issue,F01+F3,F2+F3,F3*7,F1+F3,F2") + +;; Double Precision floating point computation +;; (FCNVDS, FCNVSD, FLOAT, FTRC) +;; Group: FE +;; Latency: (3,4)/5 +;; Issue Rate: 1 + +(define_insn_reservation "dp_float" 4 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "dfp_conv")) + "issue,F01,F1+F2,F2") + +;; Double-precision floating-point (FADD,FMUL,FSUB) +;; Group: FE +;; Latency: (7,8)/9 +;; Issue Rate: 1 + +(define_insn_reservation "fp_double_arith" 8 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "dfp_arith,dfp_mul")) + "issue,F01,F1+F2,fpu*4,F2") + +;; Double-precision FCMP (FCMP/EQ,FCMP/GT) +;; Group: CO +;; Latency: 3/5 +;; Issue Rate: 2 + +(define_insn_reservation "fp_double_cmp" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "dfp_cmp")) + "d_lock,(d_lock+F01),F1+F2,F2") + +;; Double precision FDIV/SQRT +;; Group: FE +;; Latency: (24,25)/26 +;; Issue Rate: 1 + +(define_insn_reservation "dp_div" 25 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "dfdiv")) + "issue,F01+F3,F1+F2+F3,F2+F3,F3*16,F1+F3,(fpu+F3)*2,F2") + + +;; Use the branch-not-taken case to model arith3 insns. For the branch taken +;; case, we'd get a d_lock instead of issue at the end. +(define_insn_reservation "arith3" 3 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "arith3")) + "issue,d_lock+pcr_addrcalc,issue") + +;; arith3b insns schedule the same no matter if the branch is taken or not. +(define_insn_reservation "arith3b" 2 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "arith3")) + "issue,d_lock+pcr_addrcalc") diff --git a/gcc/config/sh/sh4a.md b/gcc/config/sh/sh4a.md new file mode 100644 index 000000000..75f239f53 --- /dev/null +++ b/gcc/config/sh/sh4a.md @@ -0,0 +1,236 @@ +;; Scheduling description for Renesas SH4a +;; Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The following description models the SH4A pipeline +;; using the DFA based scheduler. + +(define_automaton "sh4a") + +(define_cpu_unit "sh4a_ex" "sh4a") +(define_cpu_unit "sh4a_ls" "sh4a") +(define_cpu_unit "sh4a_fex" "sh4a") +(define_cpu_unit "sh4a_fls" "sh4a") +(define_cpu_unit "sh4a_mult" "sh4a") +(define_cpu_unit "sh4a_fdiv" "sh4a") + +;; Decoding is done on the integer pipeline like the +;; sh4. Define issue to be the | of the two pipelines +;; to control how often instructions are issued. +(define_reservation "ID_or" "sh4a_ex|sh4a_ls") +(define_reservation "ID_and" "sh4a_ex+sh4a_ls") + + +;; ======================================================= +;; Locking Descriptions + +;; Sh4a_Memory access on the LS pipeline. +(define_cpu_unit "sh4a_memory" "sh4a") + +;; Other access on the LS pipeline. +(define_cpu_unit "sh4a_load_store" "sh4a") + +;; The address calculator used for branch instructions. +;; This will be reserved after "issue" of branch instructions +;; and this is to make sure that no two branch instructions +;; can be issued in parallel. +(define_reservation "sh4a_addrcalc" "sh4a_ex") + +;; ======================================================= +;; Reservations + +;; Branch (BF,BF/S,BT,BT/S,BRA,BSR) +;; Group: BR +;; Latency when taken: 2 +(define_insn_reservation "sh4a_branch" 2 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "cbranch,jump")) + "ID_or+sh4a_addrcalc") + +;; Jump (JSR,JMP,RTS) +;; Group: BR +;; Latency: 3 +(define_insn_reservation "sh4a_jump" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "return,jump_ind")) + "ID_or+sh4a_addrcalc") + +;; RTE +;; Group: CO +;; Latency: 3 +(define_insn_reservation "sh4a_rte" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "rte")) + "ID_and*4") + +;; EX Group Single +;; Group: EX +;; Latency: 0 +(define_insn_reservation "sh4a_ex" 0 + (and (eq_attr "cpu" "sh4a") + (eq_attr "insn_class" "ex_group")) + "sh4a_ex") + +;; MOVA +;; Group: LS +;; Latency: 1 +(define_insn_reservation "sh4a_mova" 1 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "mova")) + "sh4a_ls+sh4a_load_store") + +;; MOV +;; Group: MT +;; Latency: 0 +;; ??? not sure if movi8 belongs here, but that's where it was +;; effectively before. +(define_insn_reservation "sh4a_mov" 0 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "move,movi8,gp_mac")) + "ID_or") + +;; Load +;; Group: LS +;; Latency: 3 +(define_insn_reservation "sh4a_load" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "load,pcload,mem_mac")) + "sh4a_ls+sh4a_memory") + +(define_insn_reservation "sh4a_load_si" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "load_si,pcload_si")) + "sh4a_ls+sh4a_memory") + +;; Store +;; Group: LS +;; Latency: 0 +(define_insn_reservation "sh4a_store" 0 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "store,fstore,mac_mem")) + "sh4a_ls+sh4a_memory") + +;; CWB TYPE + +;; MOVUA +;; Group: LS +;; Latency: 3 +(define_insn_reservation "sh4a_movua" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "movua")) + "sh4a_ls+sh4a_memory*2") + +;; Fixed point multiplication (single) +;; Group: CO +;; Latency: 2 +(define_insn_reservation "sh4a_smult" 2 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "smpy")) + "ID_or+sh4a_mult") + +;; Fixed point multiplication (double) +;; Group: CO +;; Latency: 3 +(define_insn_reservation "sh4a_dmult" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "dmpy")) + "ID_or+sh4a_mult") + +(define_insn_reservation "sh4a_mac_gp" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "mac_gp")) + "ID_and") + +;; Other MT group instructions(1 step operations) +;; Group: MT +;; Latency: 1 +(define_insn_reservation "sh4a_mt" 1 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "mt_group")) + "ID_or") + +;; Floating point reg move +;; Group: LS +;; Latency: 2 +(define_insn_reservation "sh4a_freg_mov" 2 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "fmove")) + "sh4a_ls,sh4a_fls") + +;; Single precision floating point computation FCMP/EQ, +;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG +;; Group: FE +;; Latency: 3 +(define_insn_reservation "sh4a_fp_arith" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "fp,fp_cmp,fpscr_toggle")) + "ID_or,sh4a_fex") + +(define_insn_reservation "sh4a_fp_arith_ftrc" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "ftrc_s")) + "ID_or,sh4a_fex") + +;; Single-precision FDIV/FSQRT +;; Group: FE +;; Latency: 20 +(define_insn_reservation "sh4a_fdiv" 20 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "fdiv")) + "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex") + +;; Double Precision floating point computation +;; (FCNVDS, FCNVSD, FLOAT, FTRC) +;; Group: FE +;; Latency: 3 +(define_insn_reservation "sh4a_dp_float" 3 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "dfp_conv")) + "ID_or,sh4a_fex") + +;; Double-precision floating-point (FADD,FMUL,FSUB) +;; Group: FE +;; Latency: 5 +(define_insn_reservation "sh4a_fp_double_arith" 5 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "dfp_arith,dfp_mul")) + "ID_or,sh4a_fex*3") + +;; Double precision FDIV/SQRT +;; Group: FE +;; Latency: 36 +(define_insn_reservation "sh4a_dp_div" 36 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "dfdiv")) + "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex*2") + +;; FSRRA +;; Group: FE +;; Latency: 5 +(define_insn_reservation "sh4a_fsrra" 5 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "fsrra")) + "ID_or,sh4a_fex") + +;; FSCA +;; Group: FE +;; Latency: 7 +(define_insn_reservation "sh4a_fsca" 7 + (and (eq_attr "cpu" "sh4a") + (eq_attr "type" "fsca")) + "ID_or,sh4a_fex*3") diff --git a/gcc/config/sh/sh64.h b/gcc/config/sh/sh64.h new file mode 100644 index 000000000..c954d72ca --- /dev/null +++ b/gcc/config/sh/sh64.h @@ -0,0 +1,26 @@ +/* Definitions of target machine for GNU compiler for SuperH SH 5. + Copyright 2000, 2001, 2002, 2003, 2004, 2007 Free Software Foundation, Inc. + Contributed by Alexandre Oliva + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef TARGET_VERSION +#define TARGET_VERSION \ + fputs (" (SuperH SH)", stderr); + +#undef SH_ELF_WCHAR_TYPE +#define SH_ELF_WCHAR_TYPE "int" diff --git a/gcc/config/sh/shmedia.h b/gcc/config/sh/shmedia.h new file mode 100644 index 000000000..d78a5e573 --- /dev/null +++ b/gcc/config/sh/shmedia.h @@ -0,0 +1,30 @@ +/* Copyright (C) 2000, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef _SHMEDIA_H +#define _SHMEDIA_H + +#include +#include + +#endif diff --git a/gcc/config/sh/shmedia.md b/gcc/config/sh/shmedia.md new file mode 100644 index 000000000..47c1ce694 --- /dev/null +++ b/gcc/config/sh/shmedia.md @@ -0,0 +1,94 @@ +;; DFA scheduling description for SH-5 SHmedia instructions. +;; Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; This is just a conversion of the old model using define_function_unit. + +;; When executing SHmedia code, the SH-5 is a fairly straightforward +;; single-issue machine. It has four pipelines, the branch unit (br), +;; the integer and multimedia unit (imu), the load/store unit (lsu), and +;; the floating point unit (fpu). + +(define_automaton "sh5inst_pipe, sh5fpu_pipe") + +(define_cpu_unit "sh5issue" "sh5inst_pipe") + +(define_cpu_unit "sh5fds" "sh5fpu_pipe") + +;; Every instruction on SH-5 occupies the issue resource for at least one +;; cycle. +(define_insn_reservation "shmedia1" 1 + (and (eq_attr "pipe_model" "sh5media") + (eq_attr "type" "!pt_media,ptabs_media,invalidate_line_media,dmpy_media,load_media,fload_media,fcmp_media,fmove_media,fparith_media,dfparith_media,fpconv_media,dfpconv_media,dfmul_media,store_media,fstore_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media")) + "sh5issue") + +;; Specify the various types of instruction which have latency > 1 +(define_insn_reservation "shmedia2" 2 + (and (eq_attr "pipe_model" "sh5media") + (eq_attr "type" "mcmp_media")) + "sh5issue") + +(define_insn_reservation "shmedia3" 3 + (and (eq_attr "pipe_model" "sh5media") + (eq_attr "type" "dmpy_media,load_media,fcmp_media,mac_media")) + "sh5issue") +;; but see sh_adjust_cost for mac_media exception. + +(define_insn_reservation "shmedia4" 4 + (and (eq_attr "pipe_model" "sh5media") + (eq_attr "type" "fload_media,fmove_media")) + "sh5issue") + +(define_insn_reservation "shmedia_d2mpy" 4 + (and (eq_attr "pipe_model" "sh5media") + (eq_attr "type" "d2mpy_media")) + "sh5issue*2") + +(define_insn_reservation "shmedia5" 5 + (and (eq_attr "pipe_model" "sh5media") + (eq_attr "type" "pt_media,ptabs_media")) + "sh5issue") + +(define_insn_reservation "shmedia6" 6 + (and (eq_attr "pipe_model" "sh5media") + (eq_attr "type" "fparith_media,dfparith_media,fpconv_media,dfpconv_media")) + "sh5issue") + +(define_insn_reservation "shmedia_invalidate" 7 + (and (eq_attr "pipe_model" "sh5media") + (eq_attr "type" "invalidate_line_media")) + "sh5issue*7") + +(define_insn_reservation "shmedia_dfmul" 9 + (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfmul_media")) + "sh5issue*4") + +(define_insn_reservation "shmedia_atrans" 10 + (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "atrans_media")) + "sh5issue*5") + +;; Floating-point divide and square-root occupy an additional resource, +;; which is not internally pipelined. However, other instructions +;; can continue to issue. +(define_insn_reservation "shmedia_fdiv" 19 + (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "fdiv_media")) + "sh5issue+sh5fds,sh5fds*18") + +(define_insn_reservation "shmedia_dfdiv" 35 + (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfdiv_media")) + "sh5issue+sh5fds,sh5fds*34") diff --git a/gcc/config/sh/sshmedia.h b/gcc/config/sh/sshmedia.h new file mode 100644 index 000000000..f8245042a --- /dev/null +++ b/gcc/config/sh/sshmedia.h @@ -0,0 +1,78 @@ +/* Copyright (C) 2000, 2001, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +/* sshmedia.h: Intrinsics corresponding to SHmedia instructions that + may only be executed in privileged mode. */ + +#ifndef _SSHMEDIA_H +#define _SSHMEDIA_H + +#if __SHMEDIA__ +__inline__ static unsigned long long sh_media_GETCON (unsigned int k) + __attribute__((always_inline)); + +__inline__ static +unsigned long long +sh_media_GETCON (unsigned int k) +{ + unsigned long long res; + __asm__ __volatile__ ("getcon cr%1, %0" : "=r" (res) : "n" (k)); + return res; +} + +__inline__ static void sh_media_PUTCON (unsigned long long mm, unsigned int k) + __attribute__((always_inline)); + +__inline__ static +void +sh_media_PUTCON (unsigned long long mm, unsigned int k) +{ + __asm__ __volatile__ ("putcon %0, cr%1" : : "r" (mm), "n" (k)); +} + +__inline__ static +unsigned long long +sh_media_GETCFG (unsigned long long mm, int s) +{ + unsigned long long res; + __asm__ __volatile__ ("getcfg %1, %2, %0" : "=r" (res) : "r" (mm), "n" (s)); + return res; +} + +__inline__ static +void +sh_media_PUTCFG (unsigned long long mm, int s, unsigned long long mw) +{ + __asm__ __volatile__ ("putcfg %0, %1, %2" : : "r" (mm), "n" (s), "r" (mw)); +} + +__inline__ static +void +sh_media_SLEEP (void) +{ + __asm__ __volatile__ ("sleep"); +} +#endif + +#endif diff --git a/gcc/config/sh/superh.h b/gcc/config/sh/superh.h new file mode 100644 index 000000000..88920739e --- /dev/null +++ b/gcc/config/sh/superh.h @@ -0,0 +1,107 @@ +/* Definitions of target machine for gcc for Super-H using sh-superh-elf. + Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +/* This header file is used when the vendor name is set to 'superh'. + config.gcc already configured the compiler for SH4 only and switched + the default endianess to little (although big endian is still available). + This file configures the spec file to the default board configuration + but in such a way that it can be overridden by a boardspecs file + (using the -specs= option). This file is expected to disable the + defaults and provide options --defsym _start and --defsym _stack + which are required by the SuperH configuration of GNU ld. + + This file is intended to override sh.h. */ + + +#ifndef _SUPERH_H +#define _SUPERH_H +#endif + + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (SuperH SH special %s)", __DATE__); + +/* Override the linker spec strings to use the new emulation + The specstrings are concatenated as follows + LINK_EMUL_PREFIX.(''|'32'|'64'|LINK_DEFAULT_CPU_EMUL).SUBTARGET_LINK_EMUL_SUFFIX +*/ +#undef LINK_EMUL_PREFIX +#undef SUBTARGET_LINK_EMUL_SUFFIX + +#define LINK_EMUL_PREFIX "superh" +#define SUBTARGET_LINK_EMUL_SUFFIX "" + +/* Add the SUBTARGET_LINK_SPEC to add the board and runtime support and + change the endianness */ +#undef SUBTARGET_LINK_SPEC +#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN +#define SUBTARGET_LINK_SPEC "%(board_link) %(ldruntime) %{ml|!mb:-EL}%{mb:-EB}" +#else +#define SUBTARGET_LINK_SPEC "%(board_link) %(ldruntime) %{ml:-EL}%{mb|!ml:-EB}" +#endif + + +/* This is used by the link spec if the boardspecs file is not used (for whatever reason). + If the boardspecs file overrides this then an alternative can be used. */ +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ +{ "board_link", "--defsym _start=0x1000 --defsym _stack=0x30000" }, \ +{ "asruntime", "" }, \ +{ "cppruntime", "-D__GDB_SIM__" }, \ +{ "cc1runtime", "" }, \ +{ "ldruntime", "" }, \ +{ "libruntime", "-lc -lgloss" } + + +/* Set the SUBTARGET_CPP_SPEC to define __EMBEDDED_CROSS__ which has an effect + on newlib and provide the runtime support */ +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC \ +"-D__EMBEDDED_CROSS__ %{m4-100*:-D__SH4_100__} %{m4-200*:-D__SH4_200__} %{m4-300*:-D__SH4_300__} %{m4-340:-D__SH4_340__} %{m4-400:-D__SH4_400__} %{m4-500:-D__SH4_500__} \ +%(cppruntime)" + +/* Override the SUBTARGET_ASM_SPEC to add the runtime support */ +#undef SUBTARGET_ASM_SPEC +#define SUBTARGET_ASM_SPEC "%{m4-100*|m4-200*:-isa=sh4} %{m4-400|m4-340:-isa=sh4-nommu-nofpu} %{m4-500:-isa=sh4-nofpu} %(asruntime)" + +/* Override the SUBTARGET_ASM_RELAX_SPEC so it doesn't interfere with the + runtime support by adding -isa=sh4 in the wrong place. */ +#undef SUBTARGET_ASM_RELAX_SPEC +#define SUBTARGET_ASM_RELAX_SPEC "%{!m4-100*:%{!m4-200*:%{!m4-300*:%{!m4-340:%{!m4-400:%{!m4-500:-isa=sh4}}}}}}" + +/* Create the CC1_SPEC to add the runtime support */ +#undef CC1_SPEC +#define CC1_SPEC "%(cc1runtime)" + +#undef CC1PLUS_SPEC +#define CC1PLUS_SPEC "%(cc1runtime)" + + +/* Override the LIB_SPEC to add the runtime support */ +#undef LIB_SPEC +#define LIB_SPEC "%{!shared:%{!symbolic:%(libruntime) -lc}} %{pg:-lprofile -lc}" + +/* Override STARTFILE_SPEC to add profiling and MMU support. */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared: %{!m4-400*:%{!m4-340*: %{pg:gcrt1-mmu.o%s}%{!pg:crt1-mmu.o%s}}}} \ + %{!shared: %{m4-340*|m4-400*: %{pg:gcrt1.o%s}%{!pg:crt1.o%s}}} \ + crti.o%s \ + %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}" diff --git a/gcc/config/sh/superh.opt b/gcc/config/sh/superh.opt new file mode 100644 index 000000000..b85abddaf --- /dev/null +++ b/gcc/config/sh/superh.opt @@ -0,0 +1,10 @@ +;; The -mboard and -mruntime options need only be accepted here, they are +;; actually processed by supplementary specs files. + +mboard= +Target RejectNegative Joined +Board name [and memory region]. + +mruntime= +Target RejectNegative Joined +Runtime name. diff --git a/gcc/config/sh/symbian-base.c b/gcc/config/sh/symbian-base.c new file mode 100644 index 000000000..f8e678be3 --- /dev/null +++ b/gcc/config/sh/symbian-base.c @@ -0,0 +1,244 @@ +/* Routines for GCC for a Symbian OS targeted SH backend, shared by + both the C and C++ compilers. + Copyright (C) 2004, 2005, 2007, 2009, 2010 Free Software Foundation, Inc. + Contributed by RedHat. + Most of this code is stolen from i386/winnt.c. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "output.h" +#include "flags.h" +#include "tree.h" +#include "expr.h" +#include "tm_p.h" +#include "diagnostic-core.h" +#include "sh-symbian.h" + +/* Return nonzero if SYMBOL is marked as being dllexport'd. */ + +bool +sh_symbian_is_dllexported_name (const char *symbol) +{ + return strncmp (DLL_EXPORT_PREFIX, symbol, + strlen (DLL_EXPORT_PREFIX)) == 0; +} + +/* Return nonzero if SYMBOL is marked as being dllimport'd. */ + +static bool +sh_symbian_is_dllimported_name (const char *symbol) +{ + return strncmp (DLL_IMPORT_PREFIX, symbol, + strlen (DLL_IMPORT_PREFIX)) == 0; +} + +/* Return nonzero if DECL is a dllexport'd object. */ + +bool +sh_symbian_is_dllexported (tree decl) +{ + tree exp; + + if ( TREE_CODE (decl) != VAR_DECL + && TREE_CODE (decl) != FUNCTION_DECL) + return false; + + exp = lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)); + + /* Class members get the dllexport status of their class. */ + if (exp == NULL) + { + tree class = sh_symbian_associated_type (decl); + + if (class) + exp = lookup_attribute ("dllexport", TYPE_ATTRIBUTES (class)); + } +#if SYMBIAN_DEBUG + if (exp) + { + print_node_brief (stderr, "dllexport:", decl, 0); + fprintf (stderr, "\n"); + } + else +#if SYMBIAN_DEBUG < 2 + if (TREE_CODE (decl) != FUNCTION_DECL) +#endif + { + print_node_brief (stderr, "no dllexport:", decl, 0); + fprintf (stderr, "\n"); + } +#endif + return exp ? true : false; +} + +/* Mark a DECL as being dllimport'd. */ + +static void +sh_symbian_mark_dllimport (tree decl) +{ + const char *oldname; + char *newname; + tree idp; + rtx rtlname; + rtx newrtl; + + rtlname = XEXP (DECL_RTL (decl), 0); + if (MEM_P (rtlname)) + rtlname = XEXP (rtlname, 0); + gcc_assert (GET_CODE (rtlname) == SYMBOL_REF); + oldname = XSTR (rtlname, 0); + + if (sh_symbian_is_dllexported_name (oldname)) + { + error ("%qE declared as both exported to and imported from a DLL", + DECL_NAME (decl)); + } + else if (sh_symbian_is_dllimported_name (oldname)) + { + /* Already done, but do a sanity check to prevent assembler errors. */ + if (!DECL_EXTERNAL (decl) || !TREE_PUBLIC (decl)) + error ("failure in redeclaration of %q+D: dllimport%'d symbol lacks external linkage", + decl); + } + else + { + newname = (char *) alloca (strlen (DLL_IMPORT_PREFIX) + strlen (oldname) + 1); + sprintf (newname, "%s%s", DLL_IMPORT_PREFIX, oldname); + + /* We pass newname through get_identifier to ensure it has a unique + address. RTL processing can sometimes peek inside the symbol ref + and compare the string's addresses to see if two symbols are + identical. */ + idp = get_identifier (newname); + newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp)); + XEXP (DECL_RTL (decl), 0) = newrtl; + } +} + +/* Mark a DECL as being dllexport'd. + Note that we override the previous setting (e.g.: dllimport). */ + +static void +sh_symbian_mark_dllexport (tree decl) +{ + const char *oldname; + char *newname; + rtx rtlname; + tree idp; + + rtlname = XEXP (DECL_RTL (decl), 0); + if (MEM_P (rtlname)) + rtlname = XEXP (rtlname, 0); + gcc_assert (GET_CODE (rtlname) == SYMBOL_REF); + oldname = XSTR (rtlname, 0); + + if (sh_symbian_is_dllimported_name (oldname)) + { + /* Remove DLL_IMPORT_PREFIX. + Note - we do not issue a warning here. In Symbian's environment it + is legitimate for a prototype to be marked as dllimport and the + corresponding definition to be marked as dllexport. The prototypes + are in headers used everywhere and the definition is in a translation + unit which has included the header in order to ensure argument + correctness. */ + oldname += strlen (DLL_IMPORT_PREFIX); + DECL_DLLIMPORT_P (decl) = 0; + } + else if (sh_symbian_is_dllexported_name (oldname)) + return; /* Already done. */ + + newname = (char *) alloca (strlen (DLL_EXPORT_PREFIX) + strlen (oldname) + 1); + sprintf (newname, "%s%s", DLL_EXPORT_PREFIX, oldname); + + /* We pass newname through get_identifier to ensure it has a unique + address. RTL processing can sometimes peek inside the symbol ref + and compare the string's addresses to see if two symbols are + identical. */ + idp = get_identifier (newname); + + XEXP (DECL_RTL (decl), 0) = + gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp)); +} + +void +sh_symbian_encode_section_info (tree decl, rtx rtl, int first) +{ + default_encode_section_info (decl, rtl, first); + + /* Mark the decl so we can tell from the rtl whether + the object is dllexport'd or dllimport'd. */ + if (sh_symbian_is_dllexported (decl)) + sh_symbian_mark_dllexport (decl); + else if (sh_symbian_is_dllimported (decl)) + sh_symbian_mark_dllimport (decl); + /* It might be that DECL has already been marked as dllimport, but a + subsequent definition nullified that. The attribute is gone but + DECL_RTL still has (DLL_IMPORT_PREFIX) prefixed. We need to remove + that. Ditto for the DECL_DLLIMPORT_P flag. */ + else if ( (TREE_CODE (decl) == FUNCTION_DECL + || TREE_CODE (decl) == VAR_DECL) + && DECL_RTL (decl) != NULL_RTX + && MEM_P (DECL_RTL (decl)) + && MEM_P (XEXP (DECL_RTL (decl), 0)) + && GET_CODE (XEXP (XEXP (DECL_RTL (decl), 0), 0)) == SYMBOL_REF + && sh_symbian_is_dllimported_name (XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0))) + { + const char * oldname = XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0); + /* Remove DLL_IMPORT_PREFIX. */ + tree idp = get_identifier (oldname + strlen (DLL_IMPORT_PREFIX)); + rtx newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp)); + + warning (0, "%s %q+D %s after being referenced with dllimport linkage", + TREE_CODE (decl) == VAR_DECL ? "variable" : "function", + decl, (DECL_INITIAL (decl) || !DECL_EXTERNAL (decl)) + ? "defined locally" : "redeclared without dllimport attribute"); + + XEXP (DECL_RTL (decl), 0) = newrtl; + + DECL_DLLIMPORT_P (decl) = 0; + } +} + +/* Return the length of a function name prefix + that starts with the character 'c'. */ + +static int +sh_symbian_get_strip_length (int c) +{ + /* XXX Assumes strlen (DLL_EXPORT_PREFIX) == strlen (DLL_IMPORT_PREFIX). */ + return (c == SH_SYMBIAN_FLAG_CHAR[0]) ? strlen (DLL_EXPORT_PREFIX) : 0; +} + +/* Return a pointer to a function's name with any + and all prefix encodings stripped from it. */ + +const char * +sh_symbian_strip_name_encoding (const char *name) +{ + int skip; + + while ((skip = sh_symbian_get_strip_length (*name))) + name += skip; + + return name; +} + diff --git a/gcc/config/sh/symbian-c.c b/gcc/config/sh/symbian-c.c new file mode 100644 index 000000000..c93922a39 --- /dev/null +++ b/gcc/config/sh/symbian-c.c @@ -0,0 +1,181 @@ +/* Routines for C compiler part of GCC for a Symbian OS targeted SH backend. + Copyright (C) 2004, 2005, 2007, 2009, 2010 Free Software Foundation, Inc. + Contributed by RedHat. + Most of this code is stolen from i386/winnt.c. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "output.h" +#include "flags.h" +#include "tree.h" +#include "expr.h" +#include "tm_p.h" +#include "diagnostic-core.h" +#include "sh-symbian.h" + + +/* Return the type that we should use to determine if DECL is + imported or exported. */ + +tree +sh_symbian_associated_type (tree decl) +{ + tree t = NULL_TREE; + + /* We can just take the DECL_CONTEXT as normal. */ + if (DECL_CONTEXT (decl) && TYPE_P (DECL_CONTEXT (decl))) + t = DECL_CONTEXT (decl); + + return t; +} + +/* Return nonzero if DECL is a dllimport'd object. */ + +bool +sh_symbian_is_dllimported (tree decl) +{ + tree imp; + + if ( TREE_CODE (decl) != VAR_DECL + && TREE_CODE (decl) != FUNCTION_DECL) + return false; + + imp = lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl)); + if (imp) + return true; + + /* Class members get the dllimport status of their class. */ + imp = sh_symbian_associated_type (decl); + if (! imp) + return false; + + imp = lookup_attribute ("dllimport", TYPE_ATTRIBUTES (imp)); + if (!imp) + return false; + + /* Don't mark defined functions as dllimport. If the definition itself + was marked with dllimport, then sh_symbian_handle_dll_attribute reports + an error. This handles the case when the definition overrides an + earlier declaration. */ + if (TREE_CODE (decl) == FUNCTION_DECL + && DECL_INITIAL (decl) + && ! DECL_DECLARED_INLINE_P (decl)) + { + warning (OPT_Wattributes, "function %q+D is defined after prior " + "declaration as dllimport: attribute ignored", + decl); + return false; + } + + /* Don't allow definitions of static data members in dllimport + class. Just ignore the attribute for vtable data. */ + else if (TREE_CODE (decl) == VAR_DECL + && TREE_STATIC (decl) + && TREE_PUBLIC (decl) + && !DECL_EXTERNAL (decl)) + { + error ("definition of static data member %q+D of dllimport%'d class", + decl); + return false; + } + + return true; +} + +/* Handle a "dllimport" or "dllexport" attribute; + arguments as in struct attribute_spec.handler. */ + +tree +sh_symbian_handle_dll_attribute (tree *pnode, tree name, tree args, + int flags, bool *no_add_attrs) +{ + tree node = *pnode; + const char *attr = IDENTIFIER_POINTER (name); + + /* These attributes may apply to structure and union types being + created, but otherwise should pass to the declaration involved. */ + if (!DECL_P (node)) + { + if (flags & ((int) ATTR_FLAG_DECL_NEXT + | (int) ATTR_FLAG_FUNCTION_NEXT + | (int) ATTR_FLAG_ARRAY_NEXT)) + { + warning (OPT_Wattributes, "%qs attribute ignored", attr); + *no_add_attrs = true; + return tree_cons (name, args, NULL_TREE); + } + + if (TREE_CODE (node) != RECORD_TYPE && TREE_CODE (node) != UNION_TYPE) + { + warning (OPT_Wattributes, "%qs attribute ignored", attr); + *no_add_attrs = true; + } + + return NULL_TREE; + } + + /* Report error on dllimport ambiguities + seen now before they cause any damage. */ + else if (is_attribute_p ("dllimport", name)) + { + if (TREE_CODE (node) == VAR_DECL) + { + if (DECL_INITIAL (node)) + { + error ("variable %q+D definition is marked dllimport", + node); + *no_add_attrs = true; + } + + /* `extern' needn't be specified with dllimport. + Specify `extern' now and hope for the best. Sigh. */ + DECL_EXTERNAL (node) = 1; + /* Also, implicitly give dllimport'd variables declared within + a function global scope, unless declared static. */ + if (current_function_decl != NULL_TREE && ! TREE_STATIC (node)) + TREE_PUBLIC (node) = 1; + } + } + + /* Report error if symbol is not accessible at global scope. */ + if (!TREE_PUBLIC (node) + && ( TREE_CODE (node) == VAR_DECL + || TREE_CODE (node) == FUNCTION_DECL)) + { + error ("external linkage required for symbol %q+D because of %qE attribute", + node, name); + *no_add_attrs = true; + } + +#if SYMBIAN_DEBUG + print_node_brief (stderr, "mark node", node, 0); + fprintf (stderr, " as %s\n", attr); +#endif + + return NULL_TREE; +} + +int +sh_symbian_import_export_class (tree ctype ATTRIBUTE_UNUSED, int import_export) +{ + return import_export; +} diff --git a/gcc/config/sh/symbian-cxx.c b/gcc/config/sh/symbian-cxx.c new file mode 100644 index 000000000..c0f8b71f6 --- /dev/null +++ b/gcc/config/sh/symbian-cxx.c @@ -0,0 +1,662 @@ +/* Routines for C++ support for GCC for a Symbian OS targeted SH backend. + Copyright (C) 2004, 2005, 2007, 2009, 2010 Free Software Foundation, Inc. + Contributed by RedHat. + Most of this code is stolen from i386/winnt.c. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "output.h" +#include "flags.h" +#include "tree.h" +#include "expr.h" +#include "tm_p.h" +#include "cp/cp-tree.h" /* We need access to the OVL_... macros. */ +#include "diagnostic-core.h" +#include "sh-symbian.h" + + +/* Return the type that we should use to determine if DECL is + imported or exported. */ + +tree +sh_symbian_associated_type (tree decl) +{ + tree t = NULL_TREE; + + if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE) + /* Methods now inherit their dllimport/dllexport attributes correctly + so there is no need to check their class. In fact it is wrong to + check their class since a method can remain unexported from an + exported class. */ + return t; + + /* Otherwise we can just take the DECL_CONTEXT as normal. */ + if (DECL_CONTEXT (decl) && TYPE_P (DECL_CONTEXT (decl))) + t = DECL_CONTEXT (decl); + + return t; +} + + +/* Return nonzero if DECL is a dllimport'd object. */ + +bool +sh_symbian_is_dllimported (tree decl) +{ + tree imp; + + if ( TREE_CODE (decl) != VAR_DECL + && TREE_CODE (decl) != FUNCTION_DECL) + return false; + + imp = lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl)); + if (imp) + return true; + + /* Class members get the dllimport status of their class. */ + imp = sh_symbian_associated_type (decl); + if (! imp) + return false; + + imp = lookup_attribute ("dllimport", TYPE_ATTRIBUTES (imp)); + if (!imp) + return false; + + /* Don't mark defined functions as dllimport. If the definition itself + was marked with dllimport, then sh_symbian_handle_dll_attribute reports + an error. This handles the case when the definition overrides an + earlier declaration. */ + if (TREE_CODE (decl) == FUNCTION_DECL + && DECL_INITIAL (decl) + && ! DECL_DECLARED_INLINE_P (decl)) + { + /* Don't warn about artificial methods. */ + if (!DECL_ARTIFICIAL (decl)) + warning (OPT_Wattributes, "function %q+D is defined after prior " + "declaration as dllimport: attribute ignored", + decl); + return false; + } + + /* We ignore the dllimport attribute for inline member functions. + This differs from MSVC behavior which treats it like GNUC + 'extern inline' extension. */ + else if (TREE_CODE (decl) == FUNCTION_DECL && DECL_DECLARED_INLINE_P (decl)) + { + if (extra_warnings) + warning (OPT_Wattributes, "inline function %q+D is declared as " + "dllimport: attribute ignored", + decl); + return false; + } + + /* Don't allow definitions of static data members in dllimport + class. Just ignore the attribute for vtable data. */ + else if (TREE_CODE (decl) == VAR_DECL + && TREE_STATIC (decl) + && TREE_PUBLIC (decl) + && !DECL_EXTERNAL (decl)) + { + if (!DECL_VIRTUAL_P (decl)) + error ("definition of static data member %q+D of dllimport%'d class", + decl); + return false; + } + + /* Since we can't treat a pointer to a dllimport'd symbol as a + constant address, we turn off the attribute on C++ virtual + methods to allow creation of vtables using thunks. Don't mark + artificial methods either (in sh_symbian_associated_type, only + COMDAT artificial method get import status from class context). */ + else if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE + && (DECL_VIRTUAL_P (decl) || DECL_ARTIFICIAL (decl))) + return false; + + return true; +} + + +/* This code implements a specification for exporting the vtable and rtti of + classes that have members with the dllexport or dllexport attributes. + This specification is defined here: + + http://www.armdevzone.com/EABI/exported_class.txt + + Basically it says that a class's vtable and rtti should be exported if + the following rules apply: + + - If it has any non-inline non-pure virtual functions, + at least one of these need to be declared dllimport + OR any of the constructors is declared dllimport. + + AND + + - The class has an inline constructor/destructor and + a key-function (placement of vtable uniquely defined) that + is defined in this translation unit. + + The specification also says that for classes which will have their + vtables and rtti exported that their base class(es) might also need a + similar exporting if: + + - Every base class needs to have its vtable & rtti exported + as well, if the following the conditions hold true: + + The base class has a non-inline declared non-pure virtual function + + The base class is polymorphic (has or inherits any virtual functions) + or the base class has any virtual base classes. */ + +/* Decide if a base class of a class should + also have its vtable and rtti exported. */ + +static void +sh_symbian_possibly_export_base_class (tree base_class) +{ + VEC(tree,gc) *method_vec; + int len; + + if (! (TYPE_CONTAINS_VPTR_P (base_class))) + return; + + method_vec = CLASSTYPE_METHOD_VEC (base_class); + len = method_vec ? VEC_length (tree, method_vec) : 0; + + for (;len --;) + { + tree member = VEC_index (tree, method_vec, len); + + if (! member) + continue; + + for (member = OVL_CURRENT (member); member; member = OVL_NEXT (member)) + { + if (TREE_CODE (member) != FUNCTION_DECL) + continue; + + if (DECL_CONSTRUCTOR_P (member) || DECL_DESTRUCTOR_P (member)) + continue; + + if (! DECL_VIRTUAL_P (member)) + continue; + + if (DECL_PURE_VIRTUAL_P (member)) + continue; + + if (DECL_DECLARED_INLINE_P (member)) + continue; + + break; + } + + if (member) + break; + } + + if (len < 0) + return; + + /* FIXME: According to the spec this base class should be exported, but + a) how do we do this ? and + b) it does not appear to be necessary for compliance with the Symbian + OS which so far is the only consumer of this code. */ +#if SYMBIAN_DEBUG + print_node_brief (stderr, "", base_class, 0); + fprintf (stderr, " EXPORTed [base class of exported class]\n"); +#endif +} + +/* Add the named attribute to the given node. Copes with both DECLs and + TYPEs. Will only add the attribute if it is not already present. */ + +static void +sh_symbian_add_attribute (tree node, const char *attr_name) +{ + tree attrs; + tree attr; + + attrs = DECL_P (node) ? DECL_ATTRIBUTES (node) : TYPE_ATTRIBUTES (node); + + if (lookup_attribute (attr_name, attrs) != NULL_TREE) + return; + + attr = get_identifier (attr_name); + + if (DECL_P (node)) + DECL_ATTRIBUTES (node) = tree_cons (attr, NULL_TREE, attrs); + else + TYPE_ATTRIBUTES (node) = tree_cons (attr, NULL_TREE, attrs); + +#if SYMBIAN_DEBUG + fprintf (stderr, "propagate %s attribute", attr_name); + print_node_brief (stderr, " to", node, 0); + fprintf (stderr, "\n"); +#endif +} + +/* Add the named attribute to a class and its vtable and rtti. */ + +static void +sh_symbian_add_attribute_to_class_vtable_and_rtti (tree ctype, const char *attr_name) +{ + sh_symbian_add_attribute (ctype, attr_name); + + /* If the vtable exists then they need annotating as well. */ + if (CLASSTYPE_VTABLES (ctype)) + /* XXX - Do we need to annotate any vtables other than the primary ? */ + sh_symbian_add_attribute (CLASSTYPE_VTABLES (ctype), attr_name); + + /* If the rtti exists then it needs annotating as well. */ + if (TYPE_MAIN_VARIANT (ctype) + && CLASSTYPE_TYPEINFO_VAR (TYPE_MAIN_VARIANT (ctype))) + sh_symbian_add_attribute (CLASSTYPE_TYPEINFO_VAR (TYPE_MAIN_VARIANT (ctype)), + attr_name); +} + +/* Decide if a class needs to have an attribute because + one of its member functions has the attribute. */ + +static bool +sh_symbian_class_needs_attribute (tree ctype, const char *attribute_name) +{ + VEC(tree,gc) *method_vec; + + method_vec = CLASSTYPE_METHOD_VEC (ctype); + + /* If the key function has the attribute then the class needs it too. */ + if (TYPE_POLYMORPHIC_P (ctype) + && method_vec + && tree_contains_struct [TREE_CODE (ctype), TS_DECL_COMMON] == 1 + && lookup_attribute (attribute_name, + DECL_ATTRIBUTES (VEC_index (tree, method_vec, 0)))) + return true; + + /* Check the class's member functions. */ + if (TREE_CODE (ctype) == RECORD_TYPE) + { + unsigned int len; + + len = method_vec ? VEC_length (tree, method_vec) : 0; + + for (;len --;) + { + tree member = VEC_index (tree, method_vec, len); + + if (! member) + continue; + + for (member = OVL_CURRENT (member); + member; + member = OVL_NEXT (member)) + { + if (TREE_CODE (member) != FUNCTION_DECL) + continue; + + if (DECL_PURE_VIRTUAL_P (member)) + continue; + + if (! DECL_VIRTUAL_P (member)) + continue; + + if (lookup_attribute (attribute_name, DECL_ATTRIBUTES (member))) + { +#if SYMBIAN_DEBUG + print_node_brief (stderr, "", ctype, 0); + fprintf (stderr, " inherits %s because", attribute_name); + print_node_brief (stderr, "", member, 0); + fprintf (stderr, " has it.\n"); +#endif + return true; + } + } + } + } + +#if SYMBIAN_DEBUG + print_node_brief (stderr, "", ctype, 0); + fprintf (stderr, " does not inherit %s\n", attribute_name); +#endif + return false; +} + +/* Decide if a class needs its vtable and rtti exporting. */ + +static bool +symbian_export_vtable_and_rtti_p (tree ctype) +{ + bool inline_ctor_dtor; + bool dllimport_ctor_dtor; + bool dllimport_member; + tree binfo, base_binfo; + VEC(tree,gc) *method_vec; + tree key; + int i; + int len; + + /* Make sure that we are examining a class... */ + if (TREE_CODE (ctype) != RECORD_TYPE) + { +#if SYMBIAN_DEBUG + print_node_brief (stderr, "", ctype, 0); + fprintf (stderr, " does NOT need to be EXPORTed [not a class]\n"); +#endif + return false; + } + + /* If the class does not have a key function it + does not need to have its vtable exported. */ + if ((key = CLASSTYPE_KEY_METHOD (ctype)) == NULL_TREE) + { +#if SYMBIAN_DEBUG + print_node_brief (stderr, "", ctype, 0); + fprintf (stderr, " does NOT need to be EXPORTed [no key function]\n"); +#endif + return false; + } + + /* If the key fn has not been defined + then the class should not be exported. */ + if (! TREE_ASM_WRITTEN (key)) + { +#if SYMBIAN_DEBUG + print_node_brief (stderr, "", ctype, 0); + fprintf (stderr, " does NOT need to be EXPORTed [key function not defined]\n"); +#endif + return false; + } + + /* Check the class's member functions. */ + inline_ctor_dtor = false; + dllimport_ctor_dtor = false; + dllimport_member = false; + + method_vec = CLASSTYPE_METHOD_VEC (ctype); + len = method_vec ? VEC_length (tree, method_vec) : 0; + + for (;len --;) + { + tree member = VEC_index (tree, method_vec, len); + + if (! member) + continue; + + for (member = OVL_CURRENT (member); member; member = OVL_NEXT (member)) + { + if (TREE_CODE (member) != FUNCTION_DECL) + continue; + + if (DECL_CONSTRUCTOR_P (member) || DECL_DESTRUCTOR_P (member)) + { + if (DECL_DECLARED_INLINE_P (member) + /* Ignore C++ backend created inline ctors/dtors. */ + && ( DECL_MAYBE_IN_CHARGE_CONSTRUCTOR_P (member) + || DECL_MAYBE_IN_CHARGE_DESTRUCTOR_P (member))) + inline_ctor_dtor = true; + + if (lookup_attribute ("dllimport", DECL_ATTRIBUTES (member))) + dllimport_ctor_dtor = true; + } + else + { + if (DECL_PURE_VIRTUAL_P (member)) + continue; + + if (! DECL_VIRTUAL_P (member)) + continue; + + if (DECL_DECLARED_INLINE_P (member)) + continue; + + if (lookup_attribute ("dllimport", DECL_ATTRIBUTES (member))) + dllimport_member = true; + } + } + } + + if (! dllimport_member && ! dllimport_ctor_dtor) + { +#if SYMBIAN_DEBUG + print_node_brief (stderr, "", ctype, 0); + fprintf (stderr, + " does NOT need to be EXPORTed [no non-pure virtuals or ctors/dtors with dllimport]\n"); +#endif + return false; + } + + if (! inline_ctor_dtor) + { +#if SYMBIAN_DEBUG + print_node_brief (stderr, "", ctype, 0); + fprintf (stderr, + " does NOT need to be EXPORTed [no inline ctor/dtor]\n"); +#endif + return false; + } + +#if SYMBIAN_DEBUG + print_node_brief (stderr, "", ctype, 0); + fprintf (stderr, " DOES need to be EXPORTed\n"); +#endif + + /* Now we must check and possibly export the base classes. */ + for (i = 0, binfo = TYPE_BINFO (ctype); + BINFO_BASE_ITERATE (binfo, i, base_binfo); i++) + sh_symbian_possibly_export_base_class (BINFO_TYPE (base_binfo)); + + return true; +} + +/* Possibly override the decision to export class TYPE. Upon entry + IMPORT_EXPORT will contain 1 if the class is going to be exported, + -1 if it is going to be imported and 0 otherwise. This function + should return the modified value and perform any other actions + necessary to support the backend's targeted operating system. */ + +int +sh_symbian_import_export_class (tree ctype, int import_export) +{ + const char *attr_name = NULL; + + /* If we are exporting the class but it does not have the dllexport + attribute then we may need to add it. Similarly imported classes + may need the dllimport attribute. */ + switch (import_export) + { + case 1: attr_name = "dllexport"; break; + case -1: attr_name = "dllimport"; break; + default: break; + } + + if (attr_name + && ! lookup_attribute (attr_name, TYPE_ATTRIBUTES (ctype))) + { + if (sh_symbian_class_needs_attribute (ctype, attr_name)) + sh_symbian_add_attribute_to_class_vtable_and_rtti (ctype, attr_name); + + /* Classes can be forced to export their + vtable and rtti under certain conditions. */ + if (symbian_export_vtable_and_rtti_p (ctype)) + { + sh_symbian_add_attribute_to_class_vtable_and_rtti (ctype, "dllexport"); + + /* Make sure that the class and its vtable are exported. */ + import_export = 1; + + if (CLASSTYPE_VTABLES (ctype)) + DECL_EXTERNAL (CLASSTYPE_VTABLES (ctype)) = 1; + + /* Check to make sure that if the class has a key method that + it is now on the list of keyed classes. That way its vtable + will be emitted. */ + if (CLASSTYPE_KEY_METHOD (ctype)) + { + tree class; + + for (class = keyed_classes; class; class = TREE_CHAIN (class)) + if (class == ctype) + break; + + if (class == NULL_TREE) + { +#if SYMBIAN_DEBUG + print_node_brief (stderr, "Add node", ctype, 0); + fprintf (stderr, " to the keyed classes list\n"); +#endif + keyed_classes = tree_cons (NULL_TREE, ctype, keyed_classes); + } + } + + /* Make sure that the typeinfo will be emitted as well. */ + if (CLASS_TYPE_P (ctype)) + TYPE_NEEDS_CONSTRUCTING (TREE_TYPE (CLASSTYPE_TYPEINFO_VAR (TYPE_MAIN_VARIANT (ctype)))) = 1; + } + } + + return import_export; +} + +/* Handle a "dllimport" or "dllexport" attribute; + arguments as in struct attribute_spec.handler. */ + +tree +sh_symbian_handle_dll_attribute (tree *pnode, tree name, tree args, + int flags, bool *no_add_attrs) +{ + tree thunk; + tree node = *pnode; + const char *attr = IDENTIFIER_POINTER (name); + + /* These attributes may apply to structure and union types being + created, but otherwise should pass to the declaration involved. */ + if (!DECL_P (node)) + { + if (flags & ((int) ATTR_FLAG_DECL_NEXT + | (int) ATTR_FLAG_FUNCTION_NEXT + | (int) ATTR_FLAG_ARRAY_NEXT)) + { + warning (OPT_Wattributes, "%qs attribute ignored", attr); + *no_add_attrs = true; + return tree_cons (name, args, NULL_TREE); + } + + if (TREE_CODE (node) != RECORD_TYPE && TREE_CODE (node) != UNION_TYPE) + { + warning (OPT_Wattributes, "%qs attribute ignored", attr); + *no_add_attrs = true; + } + + return NULL_TREE; + } + + /* Report error on dllimport ambiguities + seen now before they cause any damage. */ + else if (is_attribute_p ("dllimport", name)) + { + if (TREE_CODE (node) == VAR_DECL) + { + if (DECL_INITIAL (node)) + { + error ("variable %q+D definition is marked dllimport", + node); + *no_add_attrs = true; + } + + /* `extern' needn't be specified with dllimport. + Specify `extern' now and hope for the best. Sigh. */ + DECL_EXTERNAL (node) = 1; + /* Also, implicitly give dllimport'd variables declared within + a function global scope, unless declared static. */ + if (current_function_decl != NULL_TREE && ! TREE_STATIC (node)) + TREE_PUBLIC (node) = 1; + } + } + + /* If the node is an overloaded constructor or destructor, then we must + make sure that the attribute is propagated along the overload chain, + as it is these overloaded functions which will be emitted, rather than + the user declared constructor itself. */ + if (TREE_CODE (TREE_TYPE (node)) == METHOD_TYPE + && (DECL_CONSTRUCTOR_P (node) || DECL_DESTRUCTOR_P (node))) + { + tree overload; + + for (overload = OVL_CHAIN (node); overload; overload = OVL_CHAIN (overload)) + { + tree node_args; + tree func_args; + tree function = OVL_CURRENT (overload); + + if (! function + || ! DECL_P (function) + || (DECL_CONSTRUCTOR_P (node) && ! DECL_CONSTRUCTOR_P (function)) + || (DECL_DESTRUCTOR_P (node) && ! DECL_DESTRUCTOR_P (function))) + continue; + + /* The arguments must match as well. */ + for (node_args = DECL_ARGUMENTS (node), func_args = DECL_ARGUMENTS (function); + node_args && func_args; + node_args = TREE_CHAIN (node_args), func_args = TREE_CHAIN (func_args)) + if (TREE_TYPE (node_args) != TREE_TYPE (func_args)) + break; + + if (node_args || func_args) + { + /* We can ignore an extraneous __in_chrg arguments in the node. + GCC generated destructors, for example, will have this. */ + if ((node_args == NULL_TREE + || func_args != NULL_TREE) + && strcmp (IDENTIFIER_POINTER (DECL_NAME (node)), "__in_chrg") != 0) + continue; + } + + sh_symbian_add_attribute (function, attr); + + /* Propagate the attribute to any function thunks as well. */ + for (thunk = DECL_THUNKS (function); thunk; thunk = DECL_CHAIN (thunk)) + if (TREE_CODE (thunk) == FUNCTION_DECL) + sh_symbian_add_attribute (thunk, attr); + } + } + + if (TREE_CODE (node) == FUNCTION_DECL && DECL_VIRTUAL_P (node)) + { + /* Propagate the attribute to any thunks of this function. */ + for (thunk = DECL_THUNKS (node); thunk; thunk = DECL_CHAIN (thunk)) + if (TREE_CODE (thunk) == FUNCTION_DECL) + sh_symbian_add_attribute (thunk, attr); + } + + /* Report error if symbol is not accessible at global scope. */ + if (!TREE_PUBLIC (node) + && ( TREE_CODE (node) == VAR_DECL + || TREE_CODE (node) == FUNCTION_DECL)) + { + error ("external linkage required for symbol %q+D because of %qE attribute", + node, name); + *no_add_attrs = true; + } + +#if SYMBIAN_DEBUG + print_node_brief (stderr, "mark node", node, 0); + fprintf (stderr, " as %s\n", attr); +#endif + + return NULL_TREE; +} diff --git a/gcc/config/sh/symbian-post.h b/gcc/config/sh/symbian-post.h new file mode 100644 index 000000000..a4497b969 --- /dev/null +++ b/gcc/config/sh/symbian-post.h @@ -0,0 +1,88 @@ +/* Definitions for the Symbian OS running on an SH part. + This file is included after all the other target specific headers. + + Copyright (C) 2004, 2007 Free Software Foundation, Inc. + Contributed by Red Hat. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#undef TARGET_VERSION +#define TARGET_VERSION \ + fputs (" (Renesas SH for Symbian OS)", stderr); + +#undef LINK_EMUL_PREFIX +#define LINK_EMUL_PREFIX "shlsymbian" + + +#define SYMBIAN_EXPORT_NAME(NAME,FILE,DECL) \ + do \ + { \ + if ((DECL && sh_symbian_is_dllexported (DECL)) \ + || sh_symbian_is_dllexported_name (NAME)) \ + { \ + fprintf ((FILE), "\t.pushsection .directive\n"); \ + fprintf ((FILE), "\t.asciz \"EXPORT %s\\n\"\n", \ + sh_symbian_strip_name_encoding (NAME)); \ + fprintf ((FILE), "\t.popsection\n"); \ + } \ + } \ + while (0) + +/* Output a function definition label. */ +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do \ + { \ + SYMBIAN_EXPORT_NAME ((NAME), (FILE), (DECL)); \ + ASM_OUTPUT_TYPE_DIRECTIVE ((FILE), (NAME), "function"); \ + ASM_DECLARE_RESULT ((FILE), DECL_RESULT (DECL)); \ + ASM_OUTPUT_LABEL ((FILE), (NAME)); \ + } \ + while (0) + +/* Output the label for an initialized variable. */ +#undef ASM_DECLARE_OBJECT_NAME +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ + do \ + { \ + HOST_WIDE_INT size; \ + \ + SYMBIAN_EXPORT_NAME ((NAME), (FILE), (DECL)); \ + ASM_OUTPUT_TYPE_DIRECTIVE ((FILE), (NAME), "object"); \ + \ + size_directive_output = 0; \ + if (!flag_inhibit_size_directive \ + && (DECL) \ + && DECL_SIZE (DECL)) \ + { \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE ((FILE), (NAME), size); \ + } \ + \ + ASM_OUTPUT_LABEL ((FILE), (NAME)); \ + } \ + while (0) + +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(FILE, NAME) \ + do \ + { \ + asm_fprintf ((FILE), "%U%s", \ + sh_symbian_strip_name_encoding (NAME)); \ + } \ + while (0) diff --git a/gcc/config/sh/symbian-pre.h b/gcc/config/sh/symbian-pre.h new file mode 100644 index 000000000..d2229e071 --- /dev/null +++ b/gcc/config/sh/symbian-pre.h @@ -0,0 +1,40 @@ +/* Definitions for the Symbian OS running on an SH part. + This file is included before any other target specific headers. + + Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc. + Contributed by Red Hat. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Enable Symbian specific code. */ +#define SYMBIAN 1 + +/* Default to using the Renesas ABI. */ +#define TARGET_ABI_DEFAULT MASK_HITACHI + +#define SUBTARGET_CPP_SPEC "" + +/* Get tree.c to declare merge_dllimport_decl_attributes(). */ +#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1 + +/* The Symbian OS currently does not support exception handling. */ +#define SUBTARGET_CC1PLUS_SPEC "-fno-exceptions" + +/* Create constructor/destructor sections without the writable flag. + Symbian puts them into the text segment and munges them later on. */ +#define CTORS_SECTION_ASM_OP "\t.section\t.ctors,\"ax\",@progbits" +#define DTORS_SECTION_ASM_OP "\t.section\t.dtors,\"ax\",@progbits" diff --git a/gcc/config/sh/t-elf b/gcc/config/sh/t-elf new file mode 100644 index 000000000..333efb54e --- /dev/null +++ b/gcc/config/sh/t-elf @@ -0,0 +1,10 @@ +EXTRA_MULTILIB_PARTS= crt1.o crti.o crtn.o \ + crtbegin.o crtend.o crtbeginS.o crtendS.o $(IC_EXTRA_PARTS) $(OPT_EXTRA_PARTS) + +# Compile crtbeginS.o and crtendS.o with pic. +CRTSTUFF_T_CFLAGS_S = -fPIC + +# Don't compile libgcc with -fpic for now. It's unlikely that we'll +# build shared libraries for embedded SH. +# Linux / Netbsd will already have set TARGET_LIBGCC2_CFLAGS. +# TARGET_LIBGCC2_CFLAGS = -fpic diff --git a/gcc/config/sh/t-linux b/gcc/config/sh/t-linux new file mode 100644 index 000000000..13ff848dd --- /dev/null +++ b/gcc/config/sh/t-linux @@ -0,0 +1,8 @@ +LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array + +LIB2FUNCS_EXTRA= $(srcdir)/config/sh/linux-atomic.asm + +MULTILIB_DIRNAMES= +MULTILIB_MATCHES = + +EXTRA_MULTILIB_PARTS= crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o diff --git a/gcc/config/sh/t-linux64 b/gcc/config/sh/t-linux64 new file mode 100644 index 000000000..126b01637 --- /dev/null +++ b/gcc/config/sh/t-linux64 @@ -0,0 +1 @@ +EXTRA_MULTILIB_PARTS= crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o diff --git a/gcc/config/sh/t-netbsd b/gcc/config/sh/t-netbsd new file mode 100644 index 000000000..b2794a006 --- /dev/null +++ b/gcc/config/sh/t-netbsd @@ -0,0 +1,31 @@ +# Copyright (C) 2002, 2004, 2009 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +TARGET_LIBGCC2_CFLAGS = -fpic -mieee +LIB1ASMFUNCS_CACHE = _ic_invalidate + +LIB2FUNCS_EXTRA= + +EXTRA_MULTILIB_PARTS= + +# NetBSD's C library includes a fast software FP library that +# has support for setting/setting the rounding mode, exception +# mask, etc. Therefore, we don't want to include software FP +# in libgcc. +FPBIT = +DPBIT = diff --git a/gcc/config/sh/t-netbsd-sh5-64 b/gcc/config/sh/t-netbsd-sh5-64 new file mode 100644 index 000000000..8fc6bd1ea --- /dev/null +++ b/gcc/config/sh/t-netbsd-sh5-64 @@ -0,0 +1 @@ +MULTILIB_DIRNAMES= $(MULTILIB_RAW_DIRNAMES:/media64=) diff --git a/gcc/config/sh/t-rtems b/gcc/config/sh/t-rtems new file mode 100644 index 000000000..9fd262cf8 --- /dev/null +++ b/gcc/config/sh/t-rtems @@ -0,0 +1,7 @@ +# Custom multilibs for RTEMS + +MULTILIB_ENDIAN = ml +MULTILIB_OPTIONS= $(MULTILIB_ENDIAN) m2/m2e/m4-single-only/m4-single/m4 +MULTILIB_DIRNAMES= +MULTILIB_MATCHES = m2=m3 m2e=m3e m2=m4-nofpu +MULTILIB_EXCEPTIONS = ml diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh new file mode 100644 index 000000000..a897bfffb --- /dev/null +++ b/gcc/config/sh/t-sh @@ -0,0 +1,166 @@ +# Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2006, 2008, 2009 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +sh-c.o: $(srcdir)/config/sh/sh-c.c \ + $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/sh/sh-c.c + +LIB1ASMSRC = sh/lib1funcs.asm +LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \ + _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ + _div_table _udiv_qrnnd_16 \ + $(LIB1ASMFUNCS_CACHE) +LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array + +TARGET_LIBGCC2_CFLAGS = -mieee + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c + echo '#endif' >> dp-bit.c + cat $(srcdir)/config/fp-bit.c >> dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c + echo '#endif' >> fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG)) +OTHER_ENDIAN = $(word 2,$(TM_ENDIAN_CONFIG)) + +MULTILIB_OPTIONS= $(OTHER_ENDIAN) $(TM_MULTILIB_CONFIG) +MULTILIB_DIRNAMES= + +# The separate entries for m2a-nofpu and m2a-single-only with +# duplicate base libraries are there to make sure we don't ever use an +# m4* multilib for m2a or vice-versa; they are not compatible. This +# is why sh2a and sh2a-single need their own multilibs. +MULTILIB_MATCHES = $(shell \ + multilibs="$(MULTILIB_OPTIONS)" ; \ + for abi in m1,m2,m3,m4-nofpu,m4-100-nofpu,m4-200-nofpu,m4-400,m4-500,m4-340,m4-300-nofpu,m4al,m4a-nofpu \ + m1,m2,m2a-nofpu \ + m2e,m3e,m4-single-only,m4-100-single-only,m4-200-single-only,m4-300-single-only,m4a-single-only \ + m2e,m2a-single-only \ + m4-single,m4-100-single,m4-200-single,m4-300-single,m4a-single \ + m4,m4-100,m4-200,m4-300,m4a \ + m5-32media,m5-compact,m5-32media \ + m5-32media-nofpu,m5-compact-nofpu,m5-32media-nofpu; do \ + subst= ; \ + for lib in `echo $$abi|tr , ' '` ; do \ + if test "`echo $$multilibs|sed s/$$lib//`" != "$$multilibs"; then \ + subst=$$lib ; \ + elif test x$$subst != x ; then \ + echo $$subst=$$lib ; \ + fi \ + done \ + done) + +# SH1 only supports big endian. +MULTILIB_EXCEPTIONS = ml/m1 ml/m2a* $(TM_MULTILIB_EXCEPTIONS_CONFIG) + +MULTILIB_OSDIRNAMES = \ + $(OTHER_ENDIAN)=!$(OTHER_ENDIAN) \ + m1=!m1 $(OTHER_ENDIAN)/m1=!$(OTHER_ENDIAN)/m1 \ + m2a=!m2a $(OTHER_ENDIAN)/m2a=!$(OTHER_ENDIAN)/m2a \ + m2a-nofpu=!m2a-nofpu $(OTHER_ENDIAN)/m2a-nofpu=!$(OTHER_ENDIAN)/m2a-nofpu \ + m2a-single-only=!m2a-single-only $(OTHER_ENDIAN)/m2a-single-only=!$(OTHER_ENDIAN)/m2a-single-only \ + m2a-single=!m2a-single $(OTHER_ENDIAN)/m2a-single=!$(OTHER_ENDIAN)/m2a-single \ + m2e=!m2e $(OTHER_ENDIAN)/m2e=!$(OTHER_ENDIAN)/m2e \ + m2=!m2 $(OTHER_ENDIAN)/m2=!$(OTHER_ENDIAN)/m2 \ + m3e=!m3e $(OTHER_ENDIAN)/m3e=!$(OTHER_ENDIAN)/m3e \ + m3=!m3 $(OTHER_ENDIAN)/m3=!$(OTHER_ENDIAN)/m3 \ + m4-nofpu=!m4-nofpu $(OTHER_ENDIAN)/m4-nofpu=!$(OTHER_ENDIAN)/m4-nofpu \ + m4-single-only=!m4-single-only $(OTHER_ENDIAN)/m4-single-only=!$(OTHER_ENDIAN)/m4-single-only \ + m4-single=!m4-single $(OTHER_ENDIAN)/m4-single=!$(OTHER_ENDIAN)/m4-single \ + m4=!m4 $(OTHER_ENDIAN)/m4=!$(OTHER_ENDIAN)/m4 \ + m4a-nofpu=!m4a-nofpu $(OTHER_ENDIAN)/m4a-nofpu=!$(OTHER_ENDIAN)/m4a-nofpu \ + m4a-single-only=!m4a-single-only $(OTHER_ENDIAN)/m4a-single-only=!$(OTHER_ENDIAN)/m4a-single-only \ + m4a-single=!m4a-single $(OTHER_ENDIAN)/m4a-single=!$(OTHER_ENDIAN)/m4a-single \ + m4a=!m4a $(OTHER_ENDIAN)/m4a=!$(OTHER_ENDIAN)/m4a \ + m4al=!m4al $(OTHER_ENDIAN)/m4al=!$(OTHER_ENDIAN)/m4al \ + m5-32media=!m5-32media $(OTHER_ENDIAN)/m5-32media=!$(OTHER_ENDIAN)/m5-32media \ + m5-32media-nofpu=!m5-32media-nofpu $(OTHER_ENDIAN)/m5-32media-nofpu=!$(OTHER_ENDIAN)/m5-32media-nofpu \ + m5-compact=!m5-compact $(OTHER_ENDIAN)/m5-compact=!$(OTHER_ENDIAN)/m5-compact \ + m5-compact-nofpu=!m5-compact-nofpu $(OTHER_ENDIAN)/m5-compact-nofpu=!$(OTHER_ENDIAN)/m5-compact-nofpu \ + m5-64media=!m5-64media $(OTHER_ENDIAN)/m5-64media=!$(OTHER_ENDIAN)/m5-64media \ + m5-64media-nofpu=!m5-64media-nofpu $(OTHER_ENDIAN)/m5-64media-nofpu=!$(OTHER_ENDIAN)/m5-64media-nofpu + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib + +$(T)crt1.o: $(srcdir)/config/sh/crt1.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crt1.o -x assembler-with-cpp $(srcdir)/config/sh/crt1.asm +$(T)crti.o: $(srcdir)/config/sh/crti.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/sh/crti.asm +$(T)crtn.o: $(srcdir)/config/sh/crtn.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/sh/crtn.asm + +$(out_object_file): gt-sh.h +gt-sh.h : s-gtype ; @true + +# These are not suitable for COFF. +# EXTRA_MULTILIB_PARTS= crt1.o crti.o crtn.o crtbegin.o crtend.o + +IC_EXTRA_PARTS= libic_invalidate_array_4-100.a libic_invalidate_array_4-200.a \ +libic_invalidate_array_4a.a +OPT_EXTRA_PARTS= libgcc-Os-4-200.a libgcc-4-300.a +EXTRA_MULTILIB_PARTS= $(IC_EXTRA_PARTS) $(OPT_EXTRA_PARTS) + +$(T)ic_invalidate_array_4-100.o: $(srcdir)/config/sh/lib1funcs.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)ic_invalidate_array_4-100.o -DL_ic_invalidate_array -DWAYS=1 -DWAY_SIZE=0x2000 -x assembler-with-cpp $(srcdir)/config/sh/lib1funcs.asm +$(T)libic_invalidate_array_4-100.a: $(T)ic_invalidate_array_4-100.o $(GCC_PASSES) + $(AR_CREATE_FOR_TARGET) $(T)libic_invalidate_array_4-100.a $(T)ic_invalidate_array_4-100.o + +$(T)ic_invalidate_array_4-200.o: $(srcdir)/config/sh/lib1funcs.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)ic_invalidate_array_4-200.o -DL_ic_invalidate_array -DWAYS=2 -DWAY_SIZE=0x2000 -x assembler-with-cpp $(srcdir)/config/sh/lib1funcs.asm +$(T)libic_invalidate_array_4-200.a: $(T)ic_invalidate_array_4-200.o $(GCC_PASSES) + $(AR_CREATE_FOR_TARGET) $(T)libic_invalidate_array_4-200.a $(T)ic_invalidate_array_4-200.o + +$(T)ic_invalidate_array_4a.o: $(srcdir)/config/sh/lib1funcs.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)ic_invalidate_array_4a.o -DL_ic_invalidate_array -D__FORCE_SH4A__ -x assembler-with-cpp $(srcdir)/config/sh/lib1funcs.asm +$(T)libic_invalidate_array_4a.a: $(T)ic_invalidate_array_4a.o $(GCC_PASSES) + $(AR_CREATE_FOR_TARGET) $(T)libic_invalidate_array_4a.a $(T)ic_invalidate_array_4a.o + +$(T)sdivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $@ -DL_sdivsi3_i4i -x assembler-with-cpp $< +$(T)udivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $@ -DL_udivsi3_i4i -x assembler-with-cpp $< +$(T)unwind-dw2-Os-4-200.o: $(srcdir)/unwind-dw2.c $(srcdir)/unwind-generic.h unwind-pe.h unwind.inc unwind-dw2-fde.h unwind-dw2.h $(CONFIG_H) coretypes.h $(TM_H) $(MACHMODE_H) longlong.h config.status stmp-int-hdrs tsystem.h $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) $(LIBGCC2_CFLAGS) $(INCLUDES) $(vis_hide) -fexceptions -Os -c -o $@ $< +OBJS_Os_4_200=$(T)sdivsi3_i4i-Os-4-200.o $(T)udivsi3_i4i-Os-4-200.o $(T)unwind-dw2-Os-4-200.o +$(T)libgcc-Os-4-200.a: $(OBJS_Os_4_200) $(GCC_PASSES) + $(AR_CREATE_FOR_TARGET) $@ $(OBJS_Os_4_200) + +$(T)div_table-4-300.o: $(srcdir)/config/sh/lib1funcs-4-300.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $@ -DL_div_table -x assembler-with-cpp $< + +$(T)libgcc-4-300.a: $(T)div_table-4-300.o $(GCC_PASSES) + $(AR_CREATE_FOR_TARGET) $@ $(T)div_table-4-300.o + +# Local Variables: +# mode: Makefile +# End: diff --git a/gcc/config/sh/t-sh64 b/gcc/config/sh/t-sh64 new file mode 100644 index 000000000..d88f929fd --- /dev/null +++ b/gcc/config/sh/t-sh64 @@ -0,0 +1,29 @@ +# Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMFUNCS = \ + _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ + _shcompact_call_trampoline _shcompact_return_trampoline \ + _shcompact_incoming_args _ic_invalidate _nested_trampoline \ + _push_pop_shmedia_regs \ + _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table + +MULTILIB_CPU_DIRS= $(ML_sh1) $(ML_sh2e) $(ML_sh2) $(ML_sh3e) $(ML_sh3) $(ML_sh4_nofpu) $(ML_sh4_single_only) $(ML_sh4_single) $(ML_sh4) $(ML_sh5_32media:m5-32media/=media32) $(ML_sh5_32media_nofpu:m5-32media-nofpu/=nofpu/media32) $(ML_sh5_compact:m5-compact/=compact) $(ML_sh5_compact_nofpu:m5-compact-nofpu/=nofpu/compact) $(ML_sh5_64media:m5-64media/=media64) $(ML_sh5_64media_nofpu:m5-64media-nofpu/=nofpu/media64) + +MULTILIB_RAW_DIRNAMES= $(MULTILIB_ENDIAN:/mb= mb) $(MULTILIB_CPU_DIRS:/=) +MULTILIB_DIRNAMES= $(MULTILIB_RAW_DIRNAMES) diff --git a/gcc/config/sh/t-superh b/gcc/config/sh/t-superh new file mode 100644 index 000000000..4e2d83dcb --- /dev/null +++ b/gcc/config/sh/t-superh @@ -0,0 +1,33 @@ +# Copyright (C) 2005, 2006 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +EXTRA_MULTILIB_PARTS= crt1.o crti.o crtn.o \ + crtbegin.o crtend.o crtbeginS.o crtendS.o \ + crt1-mmu.o gcrt1-mmu.o gcrt1.o $(IC_EXTRA_PARTS) $(OPT_EXTRA_PARTS) + +# Compile crt1-mmu.o as crt1.o with -DMMU_SUPPORT +$(T)crt1-mmu.o: $(srcdir)/config/sh/crt1.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crt1-mmu.o -DMMU_SUPPORT -x assembler-with-cpp $(srcdir)/config/sh/crt1.asm + +# Compile gcrt1-mmu.o as crt1-mmu.o with -DPROFILE +$(T)gcrt1-mmu.o: $(srcdir)/config/sh/crt1.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)gcrt1-mmu.o -DPROFILE -DMMU_SUPPORT -x assembler-with-cpp $(srcdir)/config/sh/crt1.asm + +# For sh4-400: Compile gcrt1.o as crt1.o with -DPROFILE +$(T)gcrt1.o: $(srcdir)/config/sh/crt1.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)gcrt1.o -DPROFILE -x assembler-with-cpp $(srcdir)/config/sh/crt1.asm diff --git a/gcc/config/sh/t-symbian b/gcc/config/sh/t-symbian new file mode 100644 index 000000000..f0b7dabd4 --- /dev/null +++ b/gcc/config/sh/t-symbian @@ -0,0 +1,81 @@ +# Copyright (C) 2004, 2006, 2008, 2009 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +sh-c.o: $(srcdir)/config/sh/sh-c.c \ + $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/sh/sh-c.c + +symbian-cxx.o: \ + $(srcdir)/config/sh/symbian-cxx.c \ + $(srcdir)/config/sh/sh-symbian.h \ + $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(TREE_H) $(RTL_H) \ + toplev.h output.h coretypes.h flags.h expr.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +symbian-c.o: \ + $(srcdir)/config/sh/symbian-c.c \ + $(srcdir)/config/sh/sh-symbian.h \ + $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(TREE_H) $(RTL_H) \ + toplev.h output.h coretypes.h flags.h expr.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +symbian-base.o: \ + $(srcdir)/config/sh/symbian-base.c \ + $(srcdir)/config/sh/sh-symbian.h \ + $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(TREE_H) $(RTL_H) \ + toplev.h output.h coretypes.h flags.h expr.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + +LIB1ASMSRC = sh/lib1funcs.asm +LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movstr \ + _movstr_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ + $(LIB1ASMFUNCS_CACHE) + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + cat $(srcdir)/config/fp-bit.c >> dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +$(T)crt1.o: $(srcdir)/config/sh/crt1.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crt1.o -x assembler-with-cpp $(srcdir)/config/sh/crt1.asm +$(T)crti.o: $(srcdir)/config/sh/crti.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/sh/crti.asm +$(T)crtn.o: $(srcdir)/config/sh/crtn.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/sh/crtn.asm + +$(out_object_file): gt-sh.h +gt-sh.h : s-gtype ; @true + +symbian.o: $(srcdir)/config/sh/symbian.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) output.h flags.h $(TREE_H) expr.h toplev.h $(TM_P_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/sh/symbian.c + + +# Local Variables: +# mode: Makefile +# End: diff --git a/gcc/config/sh/t-vxworks b/gcc/config/sh/t-vxworks new file mode 100644 index 000000000..66aa7091a --- /dev/null +++ b/gcc/config/sh/t-vxworks @@ -0,0 +1,9 @@ +# Multilibs for VxWorks. + +MULTILIB_OPTIONS = mrtp fPIC m2/m3/m4/m4a ml +# Don't build -fPIC without -mrtp, or -ml without -m3/-m4. +MULTILIB_EXCEPTIONS = fPIC* ml* mrtp/ml* mrtp/fPIC/ml* *m2/ml* +MULTILIB_MATCHES = m2=m4-nofpu fPIC=fpic + +# Restore a variable from t-vxworks clobbered by t-elf. +EXTRA_MULTILIB_PARTS = diff --git a/gcc/config/sh/ushmedia.h b/gcc/config/sh/ushmedia.h new file mode 100644 index 000000000..2f1f55583 --- /dev/null +++ b/gcc/config/sh/ushmedia.h @@ -0,0 +1,1087 @@ +/* Copyright (C) 2000, 2001, 2004, 2005, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +/* ushmedia.h: Intrinsics corresponding to SHmedia instructions that + may be executed in both user and privileged mode. */ + +#ifndef _USHMEDIA_H +#define _USHMEDIA_H + +#if __SHMEDIA__ +#if ! __SH4_NO_FPU +typedef float __GCC_FV __attribute__ ((vector_size (4 * sizeof (float)))); +typedef float __GCC_MTRX __attribute__ ((vector_size (16 * sizeof (float)))); +#endif + +static __inline unsigned long long +sh_media_MABS_L (unsigned long long mm) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_absv2si2 ((v2si) mm); +} + +static __inline unsigned long long +sh_media_MABS_W (unsigned long long mm) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_absv4hi2 ((v4hi) mm); +} + +static __inline unsigned long long +sh_media_MADD_L (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_addv2si3 ((v2si) mm, (v2si) mn); +} + +static __inline unsigned long long +sh_media_MADD_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_addv4hi3 ((v4hi) mm, (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MADDS_L (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_ssaddv2si3 ((v2si) mm, (v2si) mn); +} + +static __inline unsigned long long +sh_media_MADDS_UB (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_usaddv8qi3 ((v8qi) mm, (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MADDS_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_ssaddv4hi3 ((v4hi) mm, (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MCMPEQ_B (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MCMPEQ_B ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MCMPEQ_L (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_sh_media_MCMPEQ_L ((v2si) mm, + (v2si) mn); +} + +static __inline unsigned long long +sh_media_MCMPEQ_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MCMPEQ_W ((v4hi) mm, + (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MCMPGT_UB (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MCMPGT_UB ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MCMPGT_L (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_sh_media_MCMPGT_L ((v2si) mm, + (v2si) mn); +} + +static __inline unsigned long long +sh_media_MCMPGT_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MCMPGT_W ((v4hi) mm, + (v4hi) mn); +} + +#define sh_media_MCMV __builtin_sh_media_MCMV + +static __inline unsigned long long +sh_media_MCNVS_LW (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + typedef unsigned int uv2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_sh_media_MCNVS_LW ((v2si) mm, + (uv2si) mn); +} + +static __inline unsigned long long +sh_media_MCNVS_WB (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MCNVS_WB ((v4hi) mm, + (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MCNVS_WUB (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MCNVS_WUB ((v4hi) mm, + (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MEXTR1 (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MEXTR1 ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MEXTR2 (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MEXTR2 ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MEXTR3 (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MEXTR3 ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MEXTR4 (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MEXTR4 ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MEXTR5 (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MEXTR5 ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MEXTR6 (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MEXTR6 ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MEXTR7 (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MEXTR7 ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MMACFX_WL (unsigned long long mm, unsigned long long mn, + unsigned long long mw) +{ + typedef float v2hi __attribute__ ((mode(V2HI))); + typedef float v2si __attribute__ ((mode(V2SI))); + typedef unsigned int uv2si __attribute__ ((mode(V2SI))); + + long mm_l = (long) mm; + long mn_l = (long) mn; + + return ((unsigned long long) + __builtin_sh_media_MMACFX_WL ((v2hi) mm_l, (v2hi) mn_l, + (uv2si) mw)); +} + +static __inline unsigned long long +sh_media_MMACNFX_WL (unsigned long long mm, unsigned long long mn, + unsigned long long mw) +{ + typedef float v2hi __attribute__ ((mode(V2HI))); + typedef float v2si __attribute__ ((mode(V2SI))); + typedef unsigned int uv2si __attribute__ ((mode(V2SI))); + + long mm_l = (long) mm; + long mn_l = (long) mn; + + return ((unsigned long long) + __builtin_sh_media_MMACNFX_WL ((v2hi) mm_l, (v2hi) mn_l, + (uv2si) mw)); +} + +static __inline unsigned long long +sh_media_MMUL_L (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_mulv2si3 ((v2si) mm, (v2si) mn); +} + +static __inline unsigned long long +sh_media_MMUL_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_mulv4hi3 ((v4hi) mm, (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MMULFX_L (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_sh_media_MMULFX_L ((v2si) mm, + (v2si) mn); +} + +static __inline unsigned long long +sh_media_MMULFX_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MMULFX_W ((v4hi) mm, + (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MMULFXRP_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MMULFXRP_W ((v4hi) mm, + (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MMULHI_WL (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MMULHI_WL ((v4hi) mm, + (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MMULLO_WL (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MMULLO_WL ((v4hi) mm, + (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MMULSUM_WQ (unsigned long long mm, unsigned long long mn, + unsigned long long mw) +{ + typedef unsigned int uv4hi __attribute__ ((mode(V4HI))); + + return __builtin_sh_media_MMULSUM_WQ ((uv4hi) mm, (uv4hi) mn, mw); +} + +static __inline unsigned long long +sh_media_MPERM_W (unsigned long long mm, unsigned int mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MPERM_W ((v4hi) mm, mn); +} + +static __inline unsigned long long +sh_media_MSAD_UBQ (unsigned long long mm, unsigned long long mn, + unsigned long long mw) +{ + typedef unsigned int uv8qi __attribute__ ((mode(V8QI))); + + return __builtin_sh_media_MSAD_UBQ ((uv8qi) mm, (uv8qi) mn, mw); +} + +static __inline unsigned long long +sh_media_MSHALDS_L (unsigned long long mm, unsigned int mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_sh_media_MSHALDS_L ((v2si) mm, mn); +} + +static __inline unsigned long long +sh_media_MSHALDS_W (unsigned long long mm, unsigned int mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MSHALDS_W ((v4hi) mm, mn); +} + +static __inline unsigned long long +sh_media_MSHARD_L (unsigned long long mm, unsigned int mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_ashrv2si3 ((v2si) mm, mn); +} + +static __inline unsigned long long +sh_media_MSHARD_W (unsigned long long mm, unsigned int mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_ashrv4hi3 ((v4hi) mm, mn); +} + +#define sh_media_MSHARDS_Q __builtin_sh_media_MSHARDS_Q + +static __inline unsigned long long +sh_media_MSHFHI_B (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MSHFHI_B ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MSHFHI_L (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_sh_media_MSHFHI_L ((v2si) mm, + (v2si) mn); +} + +static __inline unsigned long long +sh_media_MSHFHI_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MSHFHI_W ((v4hi) mm, + (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MSHFLO_B (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_MSHFLO_B ((v8qi) mm, + (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MSHFLO_L (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_sh_media_MSHFLO_L ((v2si) mm, + (v2si) mn); +} + +static __inline unsigned long long +sh_media_MSHFLO_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sh_media_MSHFLO_W ((v4hi) mm, + (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MSHLLD_L (unsigned long long mm, unsigned int mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_ashlv2si3 ((v2si) mm, mn); +} + +static __inline unsigned long long +sh_media_MSHLLD_W (unsigned long long mm, unsigned int mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_ashlv4hi3 ((v4hi) mm, mn); +} + +static __inline unsigned long long +sh_media_MSHLRD_L (unsigned long long mm, unsigned int mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_lshrv2si3 ((v2si) mm, mn); +} + +static __inline unsigned long long +sh_media_MSHLRD_W (unsigned long long mm, unsigned int mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_lshrv4hi3 ((v4hi) mm, mn); +} + +static __inline unsigned long long +sh_media_MSUB_L (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_subv2si3 ((v2si) mm, (v2si) mn); +} + +static __inline unsigned long long +sh_media_MSUB_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_subv4hi3 ((v4hi) mm, (v4hi) mn); +} + +static __inline unsigned long long +sh_media_MSUBS_L (unsigned long long mm, unsigned long long mn) +{ + typedef float v2si __attribute__ ((mode(V2SI))); + + return (unsigned long long) __builtin_sssubv2si3 ((v2si) mm, (v2si) mn); +} + +static __inline unsigned long long +sh_media_MSUBS_UB (unsigned long long mm, unsigned long long mn) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_ussubv8qi3 ((v8qi) mm, (v8qi) mn); +} + +static __inline unsigned long long +sh_media_MSUBS_W (unsigned long long mm, unsigned long long mn) +{ + typedef float v4hi __attribute__ ((mode(V4HI))); + + return (unsigned long long) __builtin_sssubv4hi3 ((v4hi) mm, (v4hi) mn); +} + +#if ! __SH4_NOFPU__ +/* Floating-point Intrinsics */ + +#define sh_media_FABS_D __builtin_fabs +#define sh_media_FABS_S __builtin_fabsf +#define sh_media_FCMPUN_D __builtin_isunordered +#define sh_media_FCMPUN_S __builtin_isunordered + +static __inline float sh_media_FCOSA_S (float fg) +{ + union { int i; float f; } u; + + u.f = fg; + return __builtin_sh_media_FCOSA_S (u.i); +} + +static __inline float +sh_media_FGETSCR (void) +{ + float f; + + __asm volatile ("fgetscr %0" : "=f" (f)); + return f; +} + +static __inline float +sh_media_FIPR_S (const void *fvg, const void *fvh) +{ + typedef float v4sf __attribute__ ((mode(V4SF))); + v4sf vg = *(v4sf*) fvg; + v4sf vh = *(v4sf*) fvh; + + return __builtin_sh_media_FIPR_S (vg, vh); +} + +#if 0 +/* This gives different results for -O0 */ +static __inline float +sh_media_FMAC_S (float fg, float fh, float fq) +{ + return fg * fh + fq; +} +#else + +#define sh_media_FMAC_S __builtin_sh_media_FMAC_S +#endif + +static __inline long long +sh_media_FMOV_DQ (double dg) +{ + union { long long l; double d; } u; + + u.d = dg; + return u.l; +} + +static __inline float +sh_media_FMOV_LS (int mm) +{ + union { int i; float f; } u; + + u.i = mm; + return u.f; +} + +static __inline double +sh_media_FMOV_QD (long long mm) +{ + union { long long l; double d; } u; + + u.l = mm; + return u.d; +} + +static __inline int +sh_media_FMOV_SL (float fg) +{ + union { int i; float f; } u; + + u.f = fg; + return u.i; +} + +static __inline void +sh_media_FPUTSCR (float fg) +{ + __asm volatile ("fputscr %0" : : "f" (fg)); +} + +static __inline float sh_media_FSINA_S (float fg) +{ + union { int i; float f; } u; + + u.f = fg; + return __builtin_sh_media_FSINA_S (u.i); +} + +/* Can't use __builtin_sqrt / __builtin_sqrtf because they still implement + error handling unless -ffast-math is used. */ +#define sh_media_FSQRT_D __builtin_sh_media_FSQRT_D +#define sh_media_FSQRT_S __builtin_sh_media_FSQRT_S +#define sh_media_FSRRA_S __builtin_sh_media_FSRRA_S + +static __inline void +sh_media_FTRV_S (const void *mtrxg, const void *fvh, void *fvf) +{ + typedef float v16sf __attribute__ ((mode(V16SF))); + typedef float v4sf __attribute__ ((mode(V4SF))); + v16sf mtrx = *(v16sf*) mtrxg; + v4sf vh = *(v4sf*) fvh; + + *(v4sf*) fvf = __builtin_sh_media_FTRV_S (mtrx, vh); +} +#endif /* ! __SH4_NOFPU__ */ + +/* Not implemented here: Control and Configuration intrinsics. */ +/* Misaligned Access Support intrinsics */ + +static __inline unsigned long long +sh_media_LDHI_L (void *p, int s) +{ + return __builtin_sh_media_LDHI_L ((char *)p + s); +} + +static __inline unsigned long long +sh_media_LDHI_Q (void *p, int s) +{ + return __builtin_sh_media_LDHI_Q ((char *)p + s); +} + +static __inline unsigned long long +sh_media_LDLO_L (void *p, int s) +{ + return __builtin_sh_media_LDLO_L ((char *)p + s); +} + +static __inline unsigned long long +sh_media_LDLO_Q (void *p, int s) +{ + return __builtin_sh_media_LDLO_Q ((char *)p + s); +} + +static __inline void +sh_media_STHI_L (void *p, int s, unsigned int mw) +{ + __builtin_sh_media_STHI_L ((char*)p + s, mw); +} + +static __inline void +sh_media_STHI_Q (void *p, int s, unsigned long long mw) +{ + __builtin_sh_media_STHI_Q ((char*)p + s, mw); +} + +static __inline void +sh_media_STLO_L (void *p, int s, unsigned int mw) +{ + __builtin_sh_media_STLO_L ((char*)p + s, mw); +} + +static __inline void +sh_media_STLO_Q (void *p, int s, unsigned long long mw) +{ + __builtin_sh_media_STLO_Q ((char*)p + s, mw); +} + +/* Miscellaneous intrinsics */ + +#define sh_media_NSB __builtin_sh_media_NSB + +static __inline unsigned long long +sh_media_BYTEREV (unsigned long long mm) +{ + typedef float v8qi __attribute__ ((mode(V8QI))); + + return (unsigned long long) __builtin_sh_media_BYTEREV ((v8qi) mm); +} + +__inline__ static unsigned long long +sh_media_CMVEQ (unsigned long long mm, unsigned long long mn, unsigned long long mw) __attribute__ ((always_inline)); + +__inline__ static unsigned long long +sh_media_CMVEQ (unsigned long long mm, unsigned long long mn, unsigned long long mw) +{ + return mm == 0 ? mn : mw; +} + +__inline__ static unsigned long long +sh_media_CMVNE (unsigned long long mm, unsigned long long mn, unsigned long long mw) __attribute__ ((always_inline)); + +__inline__ static unsigned long long +sh_media_CMVNE (unsigned long long mm, unsigned long long mn, unsigned long long mw) +{ + return mm != 0 ? mn : mw; +} + +static __inline long long +sh_media_ADDZ_L (unsigned int mm, unsigned int mn) +{ + return mm + mn; +} + +/* NOP and Synchronization intrinsics not implemented here. */ + +static __inline__ void sh_media_PREFO(void *mm, int s) +{ + __builtin_sh_media_PREFO (mm + s, 0, 0); +} + +/* Event Handling intrinsics not implemented here. */ + +/* Old asm stuff */ + +static __inline__ +void +sh_media_NOP (void) +{ + __asm__ ("nop" : :); +} + +__inline__ static +unsigned long long +sh_media_SWAP_Q (void *mm, long long mn, unsigned long long mw) +{ + unsigned long long res; + unsigned long long *addr = (unsigned long long *)((char *)mm + mn); + __asm__ ("swap.q %m1, %0" : "=r" (res), "+o" (*addr) : "0" (mw)); + return res; +} + +__inline__ static +void +sh_media_SYNCI (void) +{ + __asm__ __volatile__ ("synci"); +} + +__inline__ static +void +sh_media_SYNCO (void) +{ + __asm__ __volatile__ ("synco"); +} + +__inline__ static +void +sh_media_ALLOCO (void *mm, int s) +{ + __builtin_sh_media_ALLOCO (mm + s); +} + +__inline__ static +void +sh_media_ICBI (void *mm, int s) +{ + __asm__ __volatile__ ("icbi %m0" : : "o" (((char*)mm)[s])); +} + +__inline__ static +void +sh_media_OCBI (void *mm, int s) +{ + __asm__ __volatile__ ("ocbi %m0" : : "o" (((char*)mm)[s])); +} + +__inline__ static +void +sh_media_OCBP (void *mm, int s) +{ + __asm__ __volatile__ ("ocbp %m0" : : "o" (((char*)mm)[s])); +} + +__inline__ static +void +sh_media_OCBWB (void *mm, int s) +{ + __asm__ __volatile__ ("ocbwb %m0" : : "o" (((char*)mm)[s])); +} + +__inline__ static +void +sh_media_PREFI (void *mm, int s) +{ + __asm__ __volatile__ ("prefi %m0" : : "o" (((char*)mm)[s])); +} + +__inline__ static +void +sh_media_BRK (void) +{ + __asm__ __volatile__ ("brk"); +} + +__inline__ static +void +sh_media_TRAPA (unsigned long long mm) +{ + __asm__ __volatile__ ("trapa %%0" : : "r" (mm)); +} + +__inline__ static +short +sh_media_unaligned_LD_W (void *p) +{ +#if __LITTLE_ENDIAN__ + return (((unsigned char *)p)[0] + | (((short)((__signed__ char *)p)[1]) << 8)); +#else + return ((((short)((__signed__ char *)p)[0]) << 8) + | ((unsigned char *)p)[1]); +#endif +} + +__inline__ static +unsigned short +sh_media_unaligned_LD_UW (void *p) +{ + unsigned char *addr = p; +#if __LITTLE_ENDIAN__ + return sh_media_MSHFLO_B (addr[0], addr[1]); +#else + return sh_media_MSHFLO_B (addr[1], addr[0]); +#endif +} + +/* We don't use the sh_media_LD* functions here because that turned out + to impede constant propagation of the offsets into the ldhi / ldlo + instructions. */ +__inline__ static +int +sh_media_unaligned_LD_L (void *p) +{ +#if __LITTLE_ENDIAN__ + return (__builtin_sh_media_LDHI_L ((char *)p + 3) + | __builtin_sh_media_LDLO_L (p)); +#else + return (__builtin_sh_media_LDLO_L ((char *)p + 3) + | __builtin_sh_media_LDHI_L (p)); +#endif +} + +__inline__ static +long long +sh_media_unaligned_LD_Q (void *p) +{ +#if __LITTLE_ENDIAN__ + return (__builtin_sh_media_LDHI_Q ((char *)p + 7) + | __builtin_sh_media_LDLO_Q (p)); +#else + return (__builtin_sh_media_LDLO_Q ((char *)p + 7) + | __builtin_sh_media_LDHI_Q (p)); +#endif +} + +__inline__ static +void +sh_media_unaligned_ST_W (void *p, unsigned int k) +{ + char *addr = p; +#if __LITTLE_ENDIAN__ + addr[0] = k; + addr[1] = k >> 8; +#else + addr[1] = k; + addr[0] = k >> 8; +#endif +} + +/* We don't use the sh_media_ST* functions here because that turned out + to impede constant propagation of the offsets into the ldhi / ldlo + instructions. */ +__inline__ static +void +sh_media_unaligned_ST_L (void *p, unsigned int k) +{ +#if __LITTLE_ENDIAN__ + __builtin_sh_media_STHI_L (p + 3, k); + __builtin_sh_media_STLO_L (p, k); +#else + __builtin_sh_media_STLO_L (p + 3, k); + __builtin_sh_media_STHI_L (p, k); +#endif +} + +__inline__ static +void +sh_media_unaligned_ST_Q (void *p, unsigned long long k) +{ +#if __LITTLE_ENDIAN__ + __builtin_sh_media_STHI_Q (p + 7, k); + __builtin_sh_media_STLO_Q (p, k); +#else + __builtin_sh_media_STLO_Q (p + 7, k); + __builtin_sh_media_STHI_Q (p, k); +#endif +} + +#if ! __SH4_NOFPU__ +__inline__ static +void +sh_media_FVCOPY_S (const void *fvg, void *fvf) +{ + const __GCC_FV *g = fvg; + __GCC_FV *f = fvf; + *f = *g; +} + +__inline__ static +void +sh_media_FVADD_S (const void *fvg, const void *fvh, void *fvf) +{ + const float *g = fvg, *h = fvh; + float *f = fvf; +#if 1 + int i; + + for (i = 0; i < 4; i++) + f[i] = g[i] + h[i]; +#else + f[0] = g[0] + h[0]; + f[1] = g[1] + h[1]; + f[2] = g[2] + h[2]; + f[3] = g[3] + h[3]; +#endif +} + +__inline__ static +void +sh_media_FVSUB_S (const void *fvg, const void *fvh, void *fvf) +{ + const float *g = fvg, *h = fvh; + float *f = fvf; +#if 1 + int i; + + for (i = 0; i < 4; i++) + f[i] = g[i] - h[i]; +#else + f[0] = g[0] - h[0]; + f[1] = g[1] - h[1]; + f[2] = g[2] - h[2]; + f[3] = g[3] - h[3]; +#endif +} + +__inline__ static +void +sh_media_FMTRXCOPY_S (const void *mtrxg, void *mtrxf) +{ + const __GCC_MTRX *g = mtrxg; + __GCC_MTRX *f = mtrxf; + *f = *g; +} + +__inline__ static +void +sh_media_FMTRXADD_S (const void *mtrxg, const void *mtrxh, void *mtrxf) +{ + const __GCC_FV *g = mtrxg, *h = mtrxh; + __GCC_FV *f = mtrxf; +#if 1 + int i; + + for (i = 0; i < 4; i++) + sh_media_FVADD_S (&g[i], &h[i], &f[i]); +#else + sh_media_FVADD_S (&g[0], &h[0], &f[0]); + sh_media_FVADD_S (&g[1], &h[1], &f[1]); + sh_media_FVADD_S (&g[2], &h[2], &f[2]); + sh_media_FVADD_S (&g[3], &h[3], &f[3]); +#endif +} + +__inline__ static +void +sh_media_FMTRXSUB_S (const void *mtrxg, const void *mtrxh, void *mtrxf) +{ + const __GCC_FV *g = mtrxg, *h = mtrxh; + __GCC_FV *f = mtrxf; +#if 1 + int i; + + for (i = 0; i < 4; i++) + sh_media_FVSUB_S (&g[i], &h[i], &f[i]); +#else + sh_media_FVSUB_S (&g[0], &h[0], &f[0]); + sh_media_FVSUB_S (&g[1], &h[1], &f[1]); + sh_media_FVSUB_S (&g[2], &h[2], &f[2]); + sh_media_FVSUB_S (&g[3], &h[3], &f[3]); +#endif +} + +__inline__ static +void +sh_media_FTRVADD_S (const void *mtrxg, const void *fvh, const void *fvi, void *fvf) +{ + sh_media_FTRV_S (mtrxg, fvh, fvf); + sh_media_FVADD_S (fvf, fvi, fvf); +} + +__inline__ static +void +sh_media_FTRVSUB_S (const void *mtrxg, const void *fvh, const void *fvi, void *fvf) +{ + sh_media_FTRV_S (mtrxg, fvh, fvf); + sh_media_FVSUB_S (fvf, fvi, fvf); +} + +__inline__ static +void +sh_media_FMTRXMUL_S (const void *mtrxg, const void *mtrxh, void *mtrxf) +{ + const __GCC_FV *g = mtrxg; + __GCC_FV *f = mtrxf; +#if 1 + int j; + + for (j = 0; j < 4; j++) + sh_media_FTRV_S (mtrxh, &g[j], &f[j]); +#else + sh_media_FTRV_S (mtrxh, &g[0], &f[0]); + sh_media_FTRV_S (mtrxh, &g[1], &f[1]); + sh_media_FTRV_S (mtrxh, &g[2], &f[2]); + sh_media_FTRV_S (mtrxh, &g[3], &f[3]); +#endif +} + +__inline__ static +void +sh_media_FMTRXMULADD_S (const void *mtrxg, const void *mtrxh, const void *mtrxi, void *mtrxf) +{ + const __GCC_FV *g = mtrxg, *i = mtrxi; + __GCC_FV *f = mtrxf; +#if 1 + int j; + + for (j = 0; j < 4; j++) + sh_media_FTRVADD_S (mtrxh, &g[j], &i[j], &f[j]); +#else + sh_media_FTRVADD_S (mtrxh, &g[0], &i[0], &f[0]); + sh_media_FTRVADD_S (mtrxh, &g[1], &i[1], &f[1]); + sh_media_FTRVADD_S (mtrxh, &g[2], &i[2], &f[2]); + sh_media_FTRVADD_S (mtrxh, &g[3], &i[3], &f[3]); +#endif +} + +__inline__ static +void +sh_media_FMTRXMULSUB_S (const void *mtrxg, const void *mtrxh, const void *mtrxi, void *mtrxf) +{ + const __GCC_FV *g = mtrxg, *i = mtrxi; + __GCC_FV *f = mtrxf; +#if 1 + int j; + + for (j = 0; j < 4; j++) + sh_media_FTRVSUB_S (mtrxh, &g[j], &i[j], &f[j]); +#else + sh_media_FTRVSUB_S (mtrxh, &g[0], &i[0], &f[0]); + sh_media_FTRVSUB_S (mtrxh, &g[1], &i[1], &f[1]); + sh_media_FTRVSUB_S (mtrxh, &g[2], &i[2], &f[2]); + sh_media_FTRVSUB_S (mtrxh, &g[3], &i[3], &f[3]); +#endif +} +#endif /* ! __SH4_NOFPU__ */ + +#endif /* __SHMEDIA__ */ + +#endif /* _USHMEDIA_H */ diff --git a/gcc/config/sh/vxworks.h b/gcc/config/sh/vxworks.h new file mode 100644 index 000000000..3276979e4 --- /dev/null +++ b/gcc/config/sh/vxworks.h @@ -0,0 +1,69 @@ +/* Definitions of target machine for GCC, + for SuperH with targeting the VXWorks run time environment. + Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("CPU=SH7000"); \ + VXWORKS_OS_CPP_BUILTINS (); \ + } \ + while (0) + +#undef SUBTARGET_OVERRIDE_OPTIONS +#define SUBTARGET_OVERRIDE_OPTIONS \ + do \ + { \ + VXWORKS_OVERRIDE_OPTIONS; \ + /* The kernel loader cannot handle the relaxation \ + relocations, so it cannot load kernel modules \ + (which are ET_REL) or RTP executables (which are \ + linked with --emit-relocs). No relaxation relocations \ + appear in shared libraries, so relaxation is OK \ + for RTP PIC. */ \ + if (TARGET_RELAX && !(TARGET_VXWORKS_RTP && flag_pic)) \ + error ("-mrelax is only supported for RTP PIC"); \ + } \ + while (0) + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC VXWORKS_ADDITIONAL_CPP_SPEC + +#undef SUBTARGET_LINK_EMUL_SUFFIX +#define SUBTARGET_LINK_EMUL_SUFFIX "_vxworks" + +#undef LIB_SPEC +#define LIB_SPEC VXWORKS_LIB_SPEC +#undef LINK_SPEC +#define LINK_SPEC VXWORKS_LINK_SPEC " " SH_LINK_SPEC +#undef STARTFILE_SPEC +#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC +#undef ENDFILE_SPEC +#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC + +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (SH/VxWorks)", stderr); + +/* There is no default multilib. */ +#undef MULTILIB_DEFAULTS + +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER -- cgit v1.2.3