From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- gcc/config/sparc/biarch64.h | 23 + gcc/config/sparc/constraints.md | 148 + gcc/config/sparc/crtfastmath.c | 44 + gcc/config/sparc/cypress.md | 50 + gcc/config/sparc/freebsd.h | 177 + gcc/config/sparc/gmon-sol2.c | 420 ++ gcc/config/sparc/hypersparc.md | 82 + gcc/config/sparc/lb1spc.asm | 784 +++ gcc/config/sparc/lb1spl.asm | 246 + gcc/config/sparc/leon.md | 56 + gcc/config/sparc/libgcc-sparc-glibc.ver | 93 + gcc/config/sparc/linux-unwind.h | 202 + gcc/config/sparc/linux.h | 168 + gcc/config/sparc/linux64.h | 289 + gcc/config/sparc/little-endian.opt | 27 + gcc/config/sparc/long-double-switch.opt | 27 + gcc/config/sparc/netbsd-elf.h | 246 + gcc/config/sparc/niagara.md | 118 + gcc/config/sparc/niagara2.md | 90 + gcc/config/sparc/openbsd1-64.h | 23 + gcc/config/sparc/openbsd64.h | 85 + gcc/config/sparc/predicates.md | 475 ++ gcc/config/sparc/rtemself.h | 33 + gcc/config/sparc/sol2-64.h | 22 + gcc/config/sparc/sol2-bi.h | 271 + gcc/config/sparc/sol2-c1.asm | 103 + gcc/config/sparc/sol2-ci.asm | 55 + gcc/config/sparc/sol2-cn.asm | 41 + gcc/config/sparc/sol2-gas-bi.h | 23 + gcc/config/sparc/sol2-gas.h | 47 + gcc/config/sparc/sol2-gld-bi.h | 67 + gcc/config/sparc/sol2-unwind.h | 480 ++ gcc/config/sparc/sol2.h | 205 + gcc/config/sparc/sp-elf.h | 69 + gcc/config/sparc/sp64-elf.h | 93 + gcc/config/sparc/sparc-modes.def | 47 + gcc/config/sparc/sparc-protos.h | 108 + gcc/config/sparc/sparc.c | 9873 +++++++++++++++++++++++++++++++ gcc/config/sparc/sparc.h | 2122 +++++++ gcc/config/sparc/sparc.md | 7828 ++++++++++++++++++++++++ gcc/config/sparc/sparc.opt | 126 + gcc/config/sparc/sparclet.md | 43 + gcc/config/sparc/supersparc.md | 92 + gcc/config/sparc/sync.md | 199 + gcc/config/sparc/sysv4.h | 125 + gcc/config/sparc/t-crtfm | 4 + gcc/config/sparc/t-crtin | 6 + gcc/config/sparc/t-elf | 40 + gcc/config/sparc/t-leon | 42 + gcc/config/sparc/t-leon3 | 37 + gcc/config/sparc/t-linux | 7 + gcc/config/sparc/t-linux64 | 37 + gcc/config/sparc/t-netbsd64 | 8 + gcc/config/sparc/t-sol2 | 39 + gcc/config/sparc/t-sol2-64 | 7 + gcc/config/sparc/t-vxworks | 5 + gcc/config/sparc/ultra1_2.md | 301 + gcc/config/sparc/ultra3.md | 189 + gcc/config/sparc/vxworks.h | 60 + 59 files changed, 26727 insertions(+) create mode 100644 gcc/config/sparc/biarch64.h create mode 100644 gcc/config/sparc/constraints.md create mode 100644 gcc/config/sparc/crtfastmath.c create mode 100644 gcc/config/sparc/cypress.md create mode 100644 gcc/config/sparc/freebsd.h create mode 100644 gcc/config/sparc/gmon-sol2.c create mode 100644 gcc/config/sparc/hypersparc.md create mode 100644 gcc/config/sparc/lb1spc.asm create mode 100644 gcc/config/sparc/lb1spl.asm create mode 100644 gcc/config/sparc/leon.md create mode 100644 gcc/config/sparc/libgcc-sparc-glibc.ver create mode 100644 gcc/config/sparc/linux-unwind.h create mode 100644 gcc/config/sparc/linux.h create mode 100644 gcc/config/sparc/linux64.h create mode 100644 gcc/config/sparc/little-endian.opt create mode 100644 gcc/config/sparc/long-double-switch.opt create mode 100644 gcc/config/sparc/netbsd-elf.h create mode 100644 gcc/config/sparc/niagara.md create mode 100644 gcc/config/sparc/niagara2.md create mode 100644 gcc/config/sparc/openbsd1-64.h create mode 100644 gcc/config/sparc/openbsd64.h create mode 100644 gcc/config/sparc/predicates.md create mode 100644 gcc/config/sparc/rtemself.h create mode 100644 gcc/config/sparc/sol2-64.h create mode 100644 gcc/config/sparc/sol2-bi.h create mode 100644 gcc/config/sparc/sol2-c1.asm create mode 100644 gcc/config/sparc/sol2-ci.asm create mode 100644 gcc/config/sparc/sol2-cn.asm create mode 100644 gcc/config/sparc/sol2-gas-bi.h create mode 100644 gcc/config/sparc/sol2-gas.h create mode 100644 gcc/config/sparc/sol2-gld-bi.h create mode 100644 gcc/config/sparc/sol2-unwind.h create mode 100644 gcc/config/sparc/sol2.h create mode 100644 gcc/config/sparc/sp-elf.h create mode 100644 gcc/config/sparc/sp64-elf.h create mode 100644 gcc/config/sparc/sparc-modes.def create mode 100644 gcc/config/sparc/sparc-protos.h create mode 100644 gcc/config/sparc/sparc.c create mode 100644 gcc/config/sparc/sparc.h create mode 100644 gcc/config/sparc/sparc.md create mode 100644 gcc/config/sparc/sparc.opt create mode 100644 gcc/config/sparc/sparclet.md create mode 100644 gcc/config/sparc/supersparc.md create mode 100644 gcc/config/sparc/sync.md create mode 100644 gcc/config/sparc/sysv4.h create mode 100644 gcc/config/sparc/t-crtfm create mode 100644 gcc/config/sparc/t-crtin create mode 100644 gcc/config/sparc/t-elf create mode 100644 gcc/config/sparc/t-leon create mode 100644 gcc/config/sparc/t-leon3 create mode 100644 gcc/config/sparc/t-linux create mode 100644 gcc/config/sparc/t-linux64 create mode 100644 gcc/config/sparc/t-netbsd64 create mode 100644 gcc/config/sparc/t-sol2 create mode 100644 gcc/config/sparc/t-sol2-64 create mode 100644 gcc/config/sparc/t-vxworks create mode 100644 gcc/config/sparc/ultra1_2.md create mode 100644 gcc/config/sparc/ultra3.md create mode 100644 gcc/config/sparc/vxworks.h (limited to 'gcc/config/sparc') diff --git a/gcc/config/sparc/biarch64.h b/gcc/config/sparc/biarch64.h new file mode 100644 index 000000000..6328a3e0a --- /dev/null +++ b/gcc/config/sparc/biarch64.h @@ -0,0 +1,23 @@ +/* Definitions of target machine for GCC, for Sun SPARC. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + Contributed by David E. O'Brien . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Specify this in a cover file to provide bi-architecture (32/64) support. */ + +#define SPARC_BI_ARCH diff --git a/gcc/config/sparc/constraints.md b/gcc/config/sparc/constraints.md new file mode 100644 index 000000000..cca34ede5 --- /dev/null +++ b/gcc/config/sparc/constraints.md @@ -0,0 +1,148 @@ +;; Constraint definitions for SPARC. +;; Copyright (C) 2008, 2010 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;; Unused letters: +;;; ABCD P Z +;;; a jkl q tuvwxyz + + +;; Register constraints + +(define_register_constraint "b" "(TARGET_V9 && TARGET_VIS ? EXTRA_FP_REGS : NO_REGS)" + "Any floating-point register in VIS mode") + +(define_register_constraint "c" "FPCC_REGS" + "Floating-point condition code register") + +(define_register_constraint "d" "(TARGET_V9 && TARGET_VIS ? FP_REGS : NO_REGS)" + "Lower floating-point register in VIS mode") + +;; In the non-V9 case, coerce V9 'e' class to 'f', so we can use 'e' in the +;; MD file for V8 and V9. +(define_register_constraint "e" "(TARGET_FPU ? (TARGET_V9 ? EXTRA_FP_REGS : FP_REGS) : NO_REGS)" + "Any floating-point register") + +(define_register_constraint "f" "(TARGET_FPU ? FP_REGS : NO_REGS)" + "Lower floating-point register") + +(define_register_constraint "h" "(TARGET_V9 && TARGET_V8PLUS ? I64_REGS : NO_REGS)" + "64-bit global or out register in V8+ mode") + + +;; Floating-point constant constraints + +(define_constraint "G" + "The floating-point zero constant" + (and (match_code "const_double") + (match_test "const_zero_operand (op, mode)"))) + + +;; Integer constant constraints + +(define_constraint "H" + "Valid operand of double arithmetic operation" + (and (match_code "const_double") + (match_test "arith_double_operand (op, DImode)"))) + +(define_constraint "I" + "Signed 13-bit integer constant" + (and (match_code "const_int") + (match_test "SPARC_SIMM13_P (ival)"))) + +(define_constraint "J" + "The integer zero constant" + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "K" + "Signed 32-bit constant that can be loaded with a sethi instruction" + (and (match_code "const_int") + (match_test "SPARC_SETHI32_P (ival)"))) + +(define_constraint "L" + "Signed 11-bit integer constant" + (and (match_code "const_int") + (match_test "SPARC_SIMM11_P (ival)"))) + +(define_constraint "M" + "Signed 10-bit integer constant" + (and (match_code "const_int") + (match_test "SPARC_SIMM10_P (ival)"))) + +(define_constraint "N" + "Signed constant that can be loaded with a sethi instruction" + (and (match_code "const_int") + (match_test "SPARC_SETHI_P (ival)"))) + +(define_constraint "O" + "The 4096 constant" + (and (match_code "const_int") + (match_test "ival == 4096"))) + + +;; Extra constraints +;; Our memory extra constraints have to emulate the behavior of 'm' and 'o', +;; i.e. accept pseudo-registers during reload. + +(define_constraint "D" + "const_vector" + (and (match_code "const_vector") + (match_test "GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT"))) + +(define_constraint "Q" + "Floating-point constant that can be loaded with a sethi instruction" + (and (match_code "const_double") + (match_test "fp_sethi_p (op)"))) + +(define_constraint "R" + "Floating-point constant that can be loaded with a move instruction" + (and (match_code "const_double") + (match_test "fp_mov_p (op)"))) + +(define_constraint "S" + "Floating-point constant that can be loaded with a high/lo_sum sequence" + (and (match_code "const_double") + (match_test "fp_high_losum_p (op)"))) + +;; Not needed in 64-bit mode +(define_constraint "T" + "Memory reference whose address is aligned to 8-byte boundary" + (and (match_test "TARGET_ARCH32") + (match_code "mem,reg") + (match_test "memory_ok_for_ldd (op)"))) + +;; Not needed in 64-bit mode +(define_constraint "U" + "Pseudo-register or hard even-numbered integer register" + (and (match_test "TARGET_ARCH32") + (match_code "reg") + (ior (match_test "REGNO (op) < FIRST_PSEUDO_REGISTER") + (not (match_test "reload_in_progress && reg_renumber [REGNO (op)] < 0"))) + (match_test "register_ok_for_ldd (op)"))) + +;; Equivalent to 'T' but available in 64-bit mode +(define_constraint "W" + "Memory reference for 'e' constraint floating-point register" + (and (match_code "mem,reg") + (match_test "memory_ok_for_ldd (op)"))) + +(define_constraint "Y" + "The vector zero constant" + (and (match_code "const_vector") + (match_test "const_zero_operand (op, mode)"))) diff --git a/gcc/config/sparc/crtfastmath.c b/gcc/config/sparc/crtfastmath.c new file mode 100644 index 000000000..04727ec94 --- /dev/null +++ b/gcc/config/sparc/crtfastmath.c @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2001, 2009 Free Software Foundation, Inc. + * Contributed by David S. Miller (davem@redhat.com) + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * . + */ + +#define FPRS_NS (1 << 22) /* Non-Standard fpu results */ + +static void __attribute__((constructor)) +set_fast_math (void) +{ + unsigned int fsr; + + /* This works for the 64-bit case because, even if 32-bit ld/st of + the fsr register modified the upper 32-bit, the only thing up there + are the 3 other condition codes which are "do not care" at the time + that this runs. */ + + __asm__("st %%fsr, %0" + : "=m" (fsr)); + + fsr |= FPRS_NS; + + __asm__("ld %0, %%fsr" + : : "m" (fsr)); +} diff --git a/gcc/config/sparc/cypress.md b/gcc/config/sparc/cypress.md new file mode 100644 index 000000000..633c0fd77 --- /dev/null +++ b/gcc/config/sparc/cypress.md @@ -0,0 +1,50 @@ +;; Scheduling description for SPARC Cypress. +;; Copyright (C) 2002, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The Cypress is a pretty simple single-issue processor. + +(define_automaton "cypress_0,cypress_1") + +(define_cpu_unit "cyp_memory, cyp_fpalu" "cypress_0") +(define_cpu_unit "cyp_fpmds" "cypress_1") + +(define_insn_reservation "cyp_load" 2 + (and (eq_attr "cpu" "cypress") + (eq_attr "type" "load,sload,fpload")) + "cyp_memory, nothing") + +(define_insn_reservation "cyp_fp_alu" 5 + (and (eq_attr "cpu" "cypress") + (eq_attr "type" "fp,fpmove")) + "cyp_fpalu, nothing*3") + +(define_insn_reservation "cyp_fp_mult" 7 + (and (eq_attr "cpu" "cypress") + (eq_attr "type" "fpmul")) + "cyp_fpmds, nothing*5") + +(define_insn_reservation "cyp_fp_div" 37 + (and (eq_attr "cpu" "cypress") + (eq_attr "type" "fpdivs,fpdivd")) + "cyp_fpmds, nothing*35") + +(define_insn_reservation "cyp_fp_sqrt" 63 + (and (eq_attr "cpu" "cypress") + (eq_attr "type" "fpsqrts,fpsqrtd")) + "cyp_fpmds, nothing*61") diff --git a/gcc/config/sparc/freebsd.h b/gcc/config/sparc/freebsd.h new file mode 100644 index 000000000..76c27d39b --- /dev/null +++ b/gcc/config/sparc/freebsd.h @@ -0,0 +1,177 @@ +/* Definitions for Sun SPARC64 running FreeBSD using the ELF format + Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007, 2010, 2011 + Free Software Foundation, Inc. + Contributed by David E. O'Brien and BSDi. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER } + +/* FreeBSD needs the platform name (sparc64) defined. + Emacs etc needs to know if the arch is 64 or 32-bits. + This also selects which targets are available via -mcpu. */ + +#undef FBSD_TARGET_CPU_CPP_BUILTINS +#define FBSD_TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__sparc64__"); \ + builtin_define ("__sparc__"); \ + builtin_define ("__sparc_v9__"); \ + builtin_define ("__sparcv9"); \ + } \ + while (0) + +#undef ASM_SPEC +#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)" + +#define LINK_SPEC "%(link_arch) \ + %{!mno-relax:%{!r:-relax}} \ + %{p:%nconsider using '-pg' instead of '-p' with gprof(1)} \ + %{assert*} %{R*} %{rpath*} %{defsym*} \ + %{shared:-Bshareable %{h*} %{soname*}} \ + %{symbolic:-Bsymbolic} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker %(fbsd_dynamic_linker) } \ + %{static:-Bstatic}}" + + +/************************[ Target stuff ]***********************************/ + +/* Define the actual types of some ANSI-mandated types. + Needs to agree with . GCC defaults come from c-decl.c, + c-common.c, and config//.h. */ + +/* Earlier headers may get this wrong for FreeBSD. + We use the GCC defaults instead. */ +#undef WCHAR_TYPE + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +/* Define for support of TFmode long double. + SPARC ABI says that long double is 4 words. */ +#undef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64) + +/* Define this to set long double type size to use in libgcc2.c, which can + not depend on target_flags. */ +#if defined(__arch64__) || defined(__LONG_DOUBLE_128__) +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128 +#else +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 +#endif + +/* Definitions for 64-bit SPARC running systems with ELF. */ + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (FreeBSD/sparc64 ELF)"); + +#define TARGET_ELF 1 + +/* XXX */ +/* A 64 bit v9 compiler with stack-bias, + in a Medium/mid code model environment. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT \ + (MASK_V9 + MASK_64BIT + MASK_PTR64 /* + MASK_FASTER_STRUCTS */ \ + + MASK_STACK_BIAS + MASK_APP_REGS + MASK_FPU \ + + MASK_LONG_DOUBLE_128 /* + MASK_HARD_QUAD */) + +/* The default code model. */ +#undef SPARC_DEFAULT_CMODEL +#define SPARC_DEFAULT_CMODEL CM_MEDLOW + +#define ENABLE_EXECUTE_STACK \ + static int need_enable_exec_stack; \ + static void check_enabling(void) __attribute__ ((constructor)); \ + static void check_enabling(void) \ + { \ + extern int sysctlbyname(const char *, void *, size_t *, void *, size_t);\ + int prot = 0; \ + size_t len = sizeof(prot); \ + \ + sysctlbyname ("kern.stackprot", &prot, &len, NULL, 0); \ + if (prot != 7) \ + need_enable_exec_stack = 1; \ + } \ + extern void __enable_execute_stack (void *); \ + void __enable_execute_stack (void *addr) \ + { \ + if (!need_enable_exec_stack) \ + return; \ + else { \ + /* 7 is PROT_READ | PROT_WRITE | PROT_EXEC */ \ + if (mprotect (addr, TRAMPOLINE_SIZE, 7) < 0) \ + perror ("mprotect of trampoline code"); \ + } \ + } + + +/************************[ Assembler stuff ]********************************/ + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* XXX2 */ +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf (LABEL, "*.L%s%lu", PREFIX, (unsigned long)(NUM)) + + +/************************[ Debugger stuff ]*********************************/ + +/* This is the char to use for continuation (in case we need to turn + continuation back on). */ + +#undef DBX_CONTIN_CHAR +#define DBX_CONTIN_CHAR '?' + +/* DWARF bits. */ + +/* Follow Irix 6 and not the Dwarf2 draft in using 64-bit offsets. + Obviously the Dwarf2 folks havn't tried to actually build systems + with their spec. On a 64-bit system, only 64-bit relocs become + RELATIVE relocations. */ + +/* #define DWARF_OFFSET_SIZE PTR_SIZE */ + +#ifdef HAVE_AS_TLS +#undef TARGET_SUN_TLS +#undef TARGET_GNU_TLS +#define TARGET_SUN_TLS 0 +#define TARGET_GNU_TLS 1 +#endif + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} " \ + FBSD_ENDFILE_SPEC + +/* We use GNU ld so undefine this so that attribute((init_priority)) works. */ +#undef CTORS_SECTION_ASM_OP +#undef DTORS_SECTION_ASM_OP diff --git a/gcc/config/sparc/gmon-sol2.c b/gcc/config/sparc/gmon-sol2.c new file mode 100644 index 000000000..452d98d7d --- /dev/null +++ b/gcc/config/sparc/gmon-sol2.c @@ -0,0 +1,420 @@ +/*- + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. [rescinded 22 July 1999] + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Mangled into a form that works on SPARC Solaris 2 by Mark Eichin + * for Cygnus Support, July 1992. + */ + +#include "tconfig.h" +#include "tsystem.h" +#include /* for creat() */ + +#if 0 +#include "sparc/gmon.h" +#else +struct phdr { + char *lpc; + char *hpc; + int ncnt; +}; +#define HISTFRACTION 2 +#define HISTCOUNTER unsigned short +#define HASHFRACTION 1 +#define ARCDENSITY 2 +#define MINARCS 50 +struct tostruct { + char *selfpc; + long count; + unsigned short link; +}; +struct rawarc { + unsigned long raw_frompc; + unsigned long raw_selfpc; + long raw_count; +}; +#define ROUNDDOWN(x,y) (((x)/(y))*(y)) +#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) + +#endif + +/* extern mcount() asm ("mcount"); */ +/*extern*/ char *minbrk /* asm ("minbrk") */; + + /* + * froms is actually a bunch of unsigned shorts indexing tos + */ +static int profiling = 3; +static unsigned short *froms; +static struct tostruct *tos = 0; +static long tolimit = 0; +static char *s_lowpc = 0; +static char *s_highpc = 0; +static unsigned long s_textsize = 0; + +static int ssiz; +static char *sbuf; +static int s_scale; + /* see profil(2) where this is describe (incorrectly) */ +#define SCALE_1_TO_1 0x10000L + +#define MSG "No space for profiling buffer(s)\n" + +static void moncontrol (int); +extern void monstartup (char *, char *); +extern void _mcleanup (void); + +void monstartup(char *lowpc, char *highpc) +{ + int monsize; + char *buffer; + register int o; + + /* + * round lowpc and highpc to multiples of the density we're using + * so the rest of the scaling (here and in gprof) stays in ints. + */ + lowpc = (char *) + ROUNDDOWN((unsigned long)lowpc, HISTFRACTION*sizeof(HISTCOUNTER)); + s_lowpc = lowpc; + highpc = (char *) + ROUNDUP((unsigned long)highpc, HISTFRACTION*sizeof(HISTCOUNTER)); + s_highpc = highpc; + s_textsize = highpc - lowpc; + monsize = (s_textsize / HISTFRACTION) + sizeof(struct phdr); + buffer = sbrk( monsize ); + if ( buffer == (char *) -1 ) { + write( 2 , MSG , sizeof(MSG) ); + return; + } + froms = (unsigned short *) sbrk( s_textsize / HASHFRACTION ); + if ( froms == (unsigned short *) -1 ) { + write( 2 , MSG , sizeof(MSG) ); + froms = 0; + return; + } + tolimit = s_textsize * ARCDENSITY / 100; + if ( tolimit < MINARCS ) { + tolimit = MINARCS; + } else if ( tolimit > 65534 ) { + tolimit = 65534; + } + tos = (struct tostruct *) sbrk( tolimit * sizeof( struct tostruct ) ); + if ( tos == (struct tostruct *) -1 ) { + write( 2 , MSG , sizeof(MSG) ); + froms = 0; + tos = 0; + return; + } + minbrk = sbrk(0); + tos[0].link = 0; + sbuf = buffer; + ssiz = monsize; + ( (struct phdr *) buffer ) -> lpc = lowpc; + ( (struct phdr *) buffer ) -> hpc = highpc; + ( (struct phdr *) buffer ) -> ncnt = ssiz; + monsize -= sizeof(struct phdr); + if ( monsize <= 0 ) + return; + o = highpc - lowpc; + if( monsize < o ) +#ifndef hp300 + s_scale = ( (float) monsize / o ) * SCALE_1_TO_1; +#else /* avoid floating point */ + { + int quot = o / monsize; + + if (quot >= 0x10000) + s_scale = 1; + else if (quot >= 0x100) + s_scale = 0x10000 / quot; + else if (o >= 0x800000) + s_scale = 0x1000000 / (o / (monsize >> 8)); + else + s_scale = 0x1000000 / ((o << 8) / monsize); + } +#endif + else + s_scale = SCALE_1_TO_1; + moncontrol(1); +} + +void +_mcleanup(void) +{ + int fd; + int fromindex; + int endfrom; + char *frompc; + int toindex; + struct rawarc rawarc; + char *profdir; + const char *proffile; + char *progname; + char buf[PATH_MAX]; + extern char **___Argv; + + moncontrol(0); + + if ((profdir = getenv("PROFDIR")) != NULL) { + /* If PROFDIR contains a null value, no profiling output is produced */ + if (*profdir == '\0') { + return; + } + + progname=strrchr(___Argv[0], '/'); + if (progname == NULL) + progname=___Argv[0]; + else + progname++; + + sprintf(buf, "%s/%ld.%s", profdir, (long) getpid(), progname); + proffile = buf; + } else { + proffile = "gmon.out"; + } + + fd = creat( proffile, 0666 ); + if ( fd < 0 ) { + perror( proffile ); + return; + } +# ifdef DEBUG + fprintf( stderr , "[mcleanup] sbuf 0x%x ssiz %d\n" , sbuf , ssiz ); +# endif /* DEBUG */ + write( fd , sbuf , ssiz ); + endfrom = s_textsize / (HASHFRACTION * sizeof(*froms)); + for ( fromindex = 0 ; fromindex < endfrom ; fromindex++ ) { + if ( froms[fromindex] == 0 ) { + continue; + } + frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof(*froms)); + for (toindex=froms[fromindex]; toindex!=0; toindex=tos[toindex].link) { +# ifdef DEBUG + fprintf( stderr , + "[mcleanup] frompc 0x%x selfpc 0x%x count %d\n" , + frompc , tos[toindex].selfpc , tos[toindex].count ); +# endif /* DEBUG */ + rawarc.raw_frompc = (unsigned long) frompc; + rawarc.raw_selfpc = (unsigned long) tos[toindex].selfpc; + rawarc.raw_count = tos[toindex].count; + write( fd , &rawarc , sizeof rawarc ); + } + } + close( fd ); +} + +/* + * The SPARC stack frame is only held together by the frame pointers + * in the register windows. According to the SVR4 SPARC ABI + * Supplement, Low Level System Information/Operating System + * Interface/Software Trap Types, a type 3 trap will flush all of the + * register windows to the stack, which will make it possible to walk + * the frames and find the return addresses. + * However, it seems awfully expensive to incur a trap (system + * call) for every function call. It turns out that "call" simply puts + * the return address in %o7 expecting the "save" in the procedure to + * shift it into %i7; this means that before the "save" occurs, %o7 + * contains the address of the call to mcount, and %i7 still contains + * the caller above that. The asm mcount here simply saves those + * registers in argument registers and branches to internal_mcount, + * simulating a call with arguments. + * Kludges: + * 1) the branch to internal_mcount is hard coded; it should be + * possible to tell asm to use the assembler-name of a symbol. + * 2) in theory, the function calling mcount could have saved %i7 + * somewhere and reused the register; in practice, I *think* this will + * break longjmp (and maybe the debugger) but I'm not certain. (I take + * some comfort in the knowledge that it will break the native mcount + * as well.) + * 3) if builtin_return_address worked, this could be portable. + * However, it would really have to be optimized for arguments of 0 + * and 1 and do something like what we have here in order to avoid the + * trap per function call performance hit. + * 4) the atexit and monsetup calls prevent this from simply + * being a leaf routine that doesn't do a "save" (and would thus have + * access to %o7 and %i7 directly) but the call to write() at the end + * would have also prevented this. + * + * -- [eichin:19920702.1107EST] + */ + +static void internal_mcount (char *, unsigned short *) __attribute__ ((used)); + +/* i7 == last ret, -> frompcindex */ +/* o7 == current ret, -> selfpc */ +/* Solaris 2 libraries use _mcount. */ +asm(".global _mcount; _mcount: mov %i7,%o1; mov %o7,%o0;b,a internal_mcount"); +/* This is for compatibility with old versions of gcc which used mcount. */ +asm(".global mcount; mcount: mov %i7,%o1; mov %o7,%o0;b,a internal_mcount"); + +static void internal_mcount(char *selfpc, unsigned short *frompcindex) +{ + register struct tostruct *top; + register struct tostruct *prevtop; + register long toindex; + static char already_setup; + + /* + * find the return address for mcount, + * and the return address for mcount's caller. + */ + + if(!already_setup) { + extern char etext[]; + extern char _start[]; + extern char _init[]; + already_setup = 1; + monstartup(_start < _init ? _start : _init, etext); +#ifdef USE_ONEXIT + on_exit(_mcleanup, 0); +#else + atexit(_mcleanup); +#endif + } + /* + * check that we are profiling + * and that we aren't recursively invoked. + */ + if (profiling) { + goto out; + } + profiling++; + /* + * check that frompcindex is a reasonable pc value. + * for example: signal catchers get called from the stack, + * not from text space. too bad. + */ + frompcindex = (unsigned short *)((long)frompcindex - (long)s_lowpc); + if ((unsigned long)frompcindex > s_textsize) { + goto done; + } + frompcindex = + &froms[((long)frompcindex) / (HASHFRACTION * sizeof(*froms))]; + toindex = *frompcindex; + if (toindex == 0) { + /* + * first time traversing this arc + */ + toindex = ++tos[0].link; + if (toindex >= tolimit) { + goto overflow; + } + *frompcindex = toindex; + top = &tos[toindex]; + top->selfpc = selfpc; + top->count = 1; + top->link = 0; + goto done; + } + top = &tos[toindex]; + if (top->selfpc == selfpc) { + /* + * arc at front of chain; usual case. + */ + top->count++; + goto done; + } + /* + * have to go looking down chain for it. + * top points to what we are looking at, + * prevtop points to previous top. + * we know it is not at the head of the chain. + */ + for (; /* goto done */; ) { + if (top->link == 0) { + /* + * top is end of the chain and none of the chain + * had top->selfpc == selfpc. + * so we allocate a new tostruct + * and link it to the head of the chain. + */ + toindex = ++tos[0].link; + if (toindex >= tolimit) { + goto overflow; + } + top = &tos[toindex]; + top->selfpc = selfpc; + top->count = 1; + top->link = *frompcindex; + *frompcindex = toindex; + goto done; + } + /* + * otherwise, check the next arc on the chain. + */ + prevtop = top; + top = &tos[top->link]; + if (top->selfpc == selfpc) { + /* + * there it is. + * increment its count + * move it to the head of the chain. + */ + top->count++; + toindex = prevtop->link; + prevtop->link = top->link; + top->link = *frompcindex; + *frompcindex = toindex; + goto done; + } + + } +done: + profiling--; + /* and fall through */ +out: + return; /* normal return restores saved registers */ + +overflow: + profiling++; /* halt further profiling */ +# define TOLIMIT "mcount: tos overflow\n" + write(2, TOLIMIT, sizeof(TOLIMIT)); + goto out; +} + +/* + * Control profiling + * profiling is what mcount checks to see if + * all the data structures are ready. + */ +static void moncontrol(int mode) +{ + if (mode) { + /* start */ + profil((unsigned short *)(sbuf + sizeof(struct phdr)), + ssiz - sizeof(struct phdr), + (long)s_lowpc, s_scale); + profiling = 0; + } else { + /* stop */ + profil((unsigned short *)0, 0, 0, 0); + profiling = 3; + } +} diff --git a/gcc/config/sparc/hypersparc.md b/gcc/config/sparc/hypersparc.md new file mode 100644 index 000000000..0d35b15e3 --- /dev/null +++ b/gcc/config/sparc/hypersparc.md @@ -0,0 +1,82 @@ +;; Scheduling description for HyperSPARC. +;; Copyright (C) 2002, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The HyperSPARC is a dual-issue processor. It is not all that fancy. + +;; ??? There are some things not modelled. For example, sethi+or +;; ??? coming right after each other are specifically identified and +;; ??? dual-issued by the processor. Similarly for sethi+ld[reg+lo]. +;; ??? Actually, to be more precise that rule is sort of modelled now. + +(define_automaton "hypersparc_0,hypersparc_1") + +;; HyperSPARC/sparclite86x scheduling + +(define_cpu_unit "hs_memory,hs_branch,hs_shift,hs_fpalu" "hypersparc_0") +(define_cpu_unit "hs_fpmds" "hypersparc_1") + +(define_insn_reservation "hs_load" 1 + (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) + (eq_attr "type" "load,sload,fpload")) + "hs_memory") + +(define_insn_reservation "hs_store" 2 + (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) + (eq_attr "type" "store,fpstore")) + "hs_memory, nothing") + +(define_insn_reservation "hs_slbranch" 1 + (and (eq_attr "cpu" "sparclite86x") + (eq_attr "type" "branch")) + "hs_branch") + +(define_insn_reservation "hs_slshift" 1 + (and (eq_attr "cpu" "sparclite86x") + (eq_attr "type" "shift")) + "hs_shift") + +(define_insn_reservation "hs_fp_alu" 1 + (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) + (eq_attr "type" "fp,fpmove,fpcmp")) + "hs_fpalu") + +(define_insn_reservation "hs_fp_mult" 1 + (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) + (eq_attr "type" "fpmul")) + "hs_fpmds") + +(define_insn_reservation "hs_fp_divs" 8 + (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) + (eq_attr "type" "fpdivs")) + "hs_fpmds*6, nothing*2") + +(define_insn_reservation "hs_fp_divd" 12 + (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) + (eq_attr "type" "fpdivd")) + "hs_fpmds*10, nothing*2") + +(define_insn_reservation "hs_fp_sqrt" 17 + (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) + (eq_attr "type" "fpsqrts,fpsqrtd")) + "hs_fpmds*15, nothing*2") + +(define_insn_reservation "hs_imul" 17 + (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) + (eq_attr "type" "imul")) + "hs_fpmds*15, nothing*2") diff --git a/gcc/config/sparc/lb1spc.asm b/gcc/config/sparc/lb1spc.asm new file mode 100644 index 000000000..b60bd5740 --- /dev/null +++ b/gcc/config/sparc/lb1spc.asm @@ -0,0 +1,784 @@ +/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 + for the sparc processor. + + These routines are derived from the SPARC Architecture Manual, version 8, + slightly edited to match the desired calling convention, and also to + optimize them for our purposes. */ + +#ifdef L_mulsi3 +.text + .align 4 + .global .umul + .proc 4 +.umul: + or %o0, %o1, %o4 ! logical or of multiplier and multiplicand + mov %o0, %y ! multiplier to Y register + andncc %o4, 0xfff, %o5 ! mask out lower 12 bits + be mul_shortway ! can do it the short way + andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc + ! + ! long multiply + ! + mulscc %o4, %o1, %o4 ! first iteration of 33 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 ! 32nd iteration + mulscc %o4, %g0, %o4 ! last iteration only shifts + ! the upper 32 bits of product are wrong, but we do not care + retl + rd %y, %o0 + ! + ! short multiply + ! +mul_shortway: + mulscc %o4, %o1, %o4 ! first iteration of 13 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 + mulscc %o4, %o1, %o4 ! 12th iteration + mulscc %o4, %g0, %o4 ! last iteration only shifts + rd %y, %o5 + sll %o4, 12, %o4 ! left shift partial product by 12 bits + srl %o5, 20, %o5 ! right shift partial product by 20 bits + retl + or %o5, %o4, %o0 ! merge for true product +#endif + +#ifdef L_divsi3 +/* + * Division and remainder, from Appendix E of the SPARC Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * .div name of function to generate + * div div=div => %o0 / %o1; div=rem => %o0 % %o1 + * true true=true => signed; true=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + .global .udiv + .align 4 + .proc 4 + .text +.udiv: + b ready_to_divide + mov 0, %g3 ! result is always positive + + .global .div + .align 4 + .proc 4 + .text +.div: + ! compute sign of result; if neither is negative, no problem + orcc %o1, %o0, %g0 ! either negative? + bge ready_to_divide ! no, go do the divide + xor %o1, %o0, %g3 ! compute sign in any case + tst %o1 + bge 1f + tst %o0 + ! %o1 is definitely negative; %o0 might also be negative + bge ready_to_divide ! if %o0 not negative... + sub %g0, %o1, %o1 ! in any case, make %o1 nonneg +1: ! %o0 is negative, %o1 is nonnegative + sub %g0, %o0, %o0 ! make %o0 nonnegative + + +ready_to_divide: + + ! Ready to divide. Compute size of quotient; scale comparand. + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta 0x2 ! ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu got_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu not_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g2 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g2. + 2: addcc %o5, %o5, %o5 + bcc not_too_big + add %g2, 1, %g2 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b do_single_div + sub %g2, 1, %g2 + + not_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be do_single_div + nop + /* NB: these are commented out in the V8-SPARC manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g2 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + do_single_div: + subcc %g2, 1, %g2 + bl end_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b end_single_divloop + nop + single_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + end_single_divloop: + subcc %g2, 1, %g2 + bge single_divloop + tst %o3 + b,a end_regular_divide + +not_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be got_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +divloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 + +L4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + + +L3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + +L2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + +L3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + +L1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + +L3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + +L2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + +L3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + 9: +end_regular_divide: + subcc %o4, 1, %o4 + bge divloop + tst %o3 + bl,a got_result + ! non-restoring fixup here (one instruction only!) + sub %o2, 1, %o2 + + +got_result: + ! check to see if answer should be < 0 + tst %g3 + bl,a 1f + sub %g0, %o2, %o2 +1: + retl + mov %o2, %o0 +#endif + +#ifdef L_modsi3 +/* This implementation was taken from glibc: + * + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ +.text + .align 4 + .global .urem + .proc 4 +.urem: + b divide + mov 0, %g3 ! result always positive + + .align 4 + .global .rem + .proc 4 +.rem: + ! compute sign of result; if neither is negative, no problem + orcc %o1, %o0, %g0 ! either negative? + bge 2f ! no, go do the divide + mov %o0, %g3 ! sign of remainder matches %o0 + tst %o1 + bge 1f + tst %o0 + ! %o1 is definitely negative; %o0 might also be negative + bge 2f ! if %o0 not negative... + sub %g0, %o1, %o1 ! in any case, make %o1 nonneg +1: ! %o0 is negative, %o1 is nonnegative + sub %g0, %o0, %o0 ! make %o0 nonnegative +2: + + ! Ready to divide. Compute size of quotient; scale comparand. +divide: + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta 0x2 !ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu got_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu not_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g2 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g2. + 2: addcc %o5, %o5, %o5 + bcc not_too_big + add %g2, 1, %g2 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b do_single_div + sub %g2, 1, %g2 + + not_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be do_single_div + nop + /* NB: these are commented out in the V8-SPARC manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g2 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + do_single_div: + subcc %g2, 1, %g2 + bl end_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b end_single_divloop + nop + single_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + end_single_divloop: + subcc %g2, 1, %g2 + bge single_divloop + tst %o3 + b,a end_regular_divide + +not_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be got_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +divloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 +L4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + +L3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + +L2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + +L3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + +L1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + +L3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + +L2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + +L3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + 9: +end_regular_divide: + subcc %o4, 1, %o4 + bge divloop + tst %o3 + bl,a got_result + ! non-restoring fixup here (one instruction only!) + add %o3, %o1, %o3 + +got_result: + ! check to see if answer should be < 0 + tst %g3 + bl,a 1f + sub %g0, %o3, %o3 +1: + retl + mov %o3, %o0 + +#endif + diff --git a/gcc/config/sparc/lb1spl.asm b/gcc/config/sparc/lb1spl.asm new file mode 100644 index 000000000..973401f80 --- /dev/null +++ b/gcc/config/sparc/lb1spl.asm @@ -0,0 +1,246 @@ +/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 + for the sparclite processor. + + These routines are all from the SPARClite User's Guide, slightly edited + to match the desired calling convention, and also to optimize them. */ + +#ifdef L_udivsi3 +.text + .align 4 + .global .udiv + .proc 04 +.udiv: + wr %g0,%g0,%y ! Not a delayed write for sparclite + tst %g0 + divscc %o0,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + retl + divscc %g1,%o1,%o0 +#endif + +#ifdef L_umodsi3 +.text + .align 4 + .global .urem + .proc 04 +.urem: + wr %g0,%g0,%y ! Not a delayed write for sparclite + tst %g0 + divscc %o0,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + divscc %g1,%o1,%g1 + bl 1f + rd %y,%o0 + retl + nop +1: retl + add %o0,%o1,%o0 +#endif + +#ifdef L_divsi3 +.text + .align 4 + .global .div + .proc 04 +! ??? This routine could be made faster if was optimized, and if it was +! rewritten to only calculate the quotient. +.div: + wr %g0,%g0,%y ! Not a delayed write for sparclite + mov %o1,%o4 + tst %o1 + bl,a 1f + sub %g0,%o4,%o4 +1: tst %o0 + bl,a 2f + mov -1,%y +2: divscc %o0,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + be 6f + mov %y,%o3 + bg 4f + addcc %o3,%o4,%g0 + be,a 6f + mov %g0,%o3 + tst %o0 + bl 5f + tst %g1 + ba 5f + add %o3,%o4,%o3 +4: subcc %o3,%o4,%g0 + be,a 6f + mov %g0,%o3 + tst %o0 + bge 5f + tst %g1 + sub %o3,%o4,%o3 +5: bl,a 6f + add %g1,1,%g1 +6: tst %o1 + bl,a 7f + sub %g0,%g1,%g1 +7: retl + mov %g1,%o0 ! Quotient is in %g1. +#endif + +#ifdef L_modsi3 +.text + .align 4 + .global .rem + .proc 04 +! ??? This routine could be made faster if was optimized, and if it was +! rewritten to only calculate the remainder. +.rem: + wr %g0,%g0,%y ! Not a delayed write for sparclite + mov %o1,%o4 + tst %o1 + bl,a 1f + sub %g0,%o4,%o4 +1: tst %o0 + bl,a 2f + mov -1,%y +2: divscc %o0,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + divscc %g1,%o4,%g1 + be 6f + mov %y,%o3 + bg 4f + addcc %o3,%o4,%g0 + be,a 6f + mov %g0,%o3 + tst %o0 + bl 5f + tst %g1 + ba 5f + add %o3,%o4,%o3 +4: subcc %o3,%o4,%g0 + be,a 6f + mov %g0,%o3 + tst %o0 + bge 5f + tst %g1 + sub %o3,%o4,%o3 +5: bl,a 6f + add %g1,1,%g1 +6: tst %o1 + bl,a 7f + sub %g0,%g1,%g1 +7: retl + mov %o3,%o0 ! Remainder is in %o3. +#endif diff --git a/gcc/config/sparc/leon.md b/gcc/config/sparc/leon.md new file mode 100644 index 000000000..bc77c6ab9 --- /dev/null +++ b/gcc/config/sparc/leon.md @@ -0,0 +1,56 @@ +;; Scheduling description for LEON. +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +(define_automaton "leon") + +(define_cpu_unit "leon_memory, leon_fpalu" "leon") +(define_cpu_unit "leon_fpmds" "leon") +(define_cpu_unit "write_buf" "leon") + +(define_insn_reservation "leon_load" 1 + (and (eq_attr "cpu" "leon") + (eq_attr "type" "load,sload,fpload")) + "leon_memory") + +(define_insn_reservation "leon_store" 1 + (and (eq_attr "cpu" "leon") + (eq_attr "type" "store,fpstore")) + "leon_memory+write_buf") + +(define_insn_reservation "leon_fp_alu" 1 + (and (eq_attr "cpu" "leon") + (eq_attr "type" "fp,fpmove")) + "leon_fpalu, nothing") + +(define_insn_reservation "leon_fp_mult" 1 + (and (eq_attr "cpu" "leon") + (eq_attr "type" "fpmul")) + "leon_fpmds, nothing") + +(define_insn_reservation "leon_fp_div" 16 + (and (eq_attr "cpu" "leon") + (eq_attr "type" "fpdivs,fpdivd")) + "leon_fpmds, nothing*15") + +(define_insn_reservation "leon_fp_sqrt" 23 + (and (eq_attr "cpu" "leon") + (eq_attr "type" "fpsqrts,fpsqrtd")) + "leon_fpmds, nothing*21") + diff --git a/gcc/config/sparc/libgcc-sparc-glibc.ver b/gcc/config/sparc/libgcc-sparc-glibc.ver new file mode 100644 index 000000000..91138d379 --- /dev/null +++ b/gcc/config/sparc/libgcc-sparc-glibc.ver @@ -0,0 +1,93 @@ +# Copyright (C) 2002, 2006, 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# In order to work around the very problems that force us to now generally +# create a libgcc.so, glibc reexported a number of routines from libgcc.a. +# By now choosing the same version tags for these specific routines, we +# maintain enough binary compatibility to allow future versions of glibc +# to defer implementation of these routines to libgcc.so via DT_AUXILIARY. + +%exclude { + __divdi3 + __moddi3 + __udivdi3 + __umoddi3 + __register_frame + __register_frame_table + __deregister_frame + __register_frame_info + __deregister_frame_info + __frame_state_for + __register_frame_info_table +} + +%ifdef __arch64__ +%define GLIBC_VER GLIBC_2.2 +%else +%define GLIBC_VER GLIBC_2.0 +%endif +%inherit GCC_3.0 GLIBC_VER +GLIBC_VER { + # Sampling of DImode arithmetic used by (at least) i386 and m68k. + __divdi3 + __moddi3 + __udivdi3 + __umoddi3 + + # Exception handling support functions used by most everyone. + __register_frame + __register_frame_table + __deregister_frame + __register_frame_info + __deregister_frame_info + __frame_state_for + __register_frame_info_table +} + +%if !defined (__arch64__) && defined (__LONG_DOUBLE_128__) + +# long double 128 bit support from 32-bit libgcc_s.so.1 is only available +# when configured with --with-long-double-128. Make sure all the +# symbols are available at @@GCC_LDBL_* versions to make it clear +# there is a configurable symbol set. + +%exclude { + __fixtfdi + __fixunstfdi + __floatditf + + __divtc3 + __multc3 + __powitf2 +} + +%inherit GCC_LDBL_3.0 GCC_3.0 +GCC_LDBL_3.0 { + __fixtfdi + __fixunstfdi + __floatditf +} + +%inherit GCC_LDBL_4.0.0 GCC_4.0.0 +GCC_LDBL_4.0.0 { + __divtc3 + __multc3 + __powitf2 +} + +%endif diff --git a/gcc/config/sparc/linux-unwind.h b/gcc/config/sparc/linux-unwind.h new file mode 100644 index 000000000..adfef6ec2 --- /dev/null +++ b/gcc/config/sparc/linux-unwind.h @@ -0,0 +1,202 @@ +/* DWARF2 EH unwinding support for SPARC Linux. + Copyright 2004, 2005, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. */ + +#if defined(__arch64__) + +/* 64-bit SPARC version */ +#define MD_FALLBACK_FRAME_STATE_FOR sparc64_fallback_frame_state + +static _Unwind_Reason_Code +sparc64_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned int *pc = context->ra; + long this_cfa = (long) context->cfa; + long new_cfa, ra_location, shifted_ra_location; + long regs_off, fpu_save_off; + long fpu_save; + int i; + + if (pc[0] != 0x82102065 /* mov NR_rt_sigreturn, %g1 */ + || pc[1] != 0x91d0206d) /* ta 0x6d */ + return _URC_END_OF_STACK; + + regs_off = 192 + 128; + fpu_save_off = regs_off + (16 * 8) + (3 * 8) + (2 * 4); + + new_cfa = *(long *)(this_cfa + regs_off + (14 * 8)); + new_cfa += 2047; /* Stack bias */ + fpu_save = *(long *)(this_cfa + fpu_save_off); + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = __builtin_dwarf_sp_column (); + fs->regs.cfa_offset = new_cfa - this_cfa; + + for (i = 1; i < 16; i++) + { + /* We never restore %sp as everything is purely CFA-based. */ + if ((unsigned int) i == __builtin_dwarf_sp_column ()) + continue; + + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = this_cfa + regs_off + (i * 8) - new_cfa; + } + for (i = 0; i < 16; i++) + { + fs->regs.reg[i + 16].how = REG_SAVED_OFFSET; + fs->regs.reg[i + 16].loc.offset + = this_cfa + (i * 8) - new_cfa; + } + if (fpu_save) + { + for (i = 0; i < 64; i++) + { + if (i > 32 && (i & 0x1)) + continue; + fs->regs.reg[i + 32].how = REG_SAVED_OFFSET; + fs->regs.reg[i + 32].loc.offset + = fpu_save + (i * 4) - new_cfa; + } + } + + /* State the rules to find the kernel's code "return address", which is + the address of the active instruction when the signal was caught. + On the SPARC, since RETURN_ADDR_OFFSET (essentially 8) is defined, we + need to preventively subtract it from the purported return address. */ + ra_location = this_cfa + regs_off + 17 * 8; + shifted_ra_location = this_cfa + regs_off + 19 * 8; /* Y register */ + *(long *)shifted_ra_location = *(long *)ra_location - 8; + fs->retaddr_column = 0; + fs->regs.reg[0].how = REG_SAVED_OFFSET; + fs->regs.reg[0].loc.offset = shifted_ra_location - new_cfa; + fs->signal_frame = 1; + + return _URC_NO_REASON; +} + +#define MD_FROB_UPDATE_CONTEXT sparc64_frob_update_context + +static void +sparc64_frob_update_context (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + /* The column of %sp contains the old CFA, not the old value of %sp. + The CFA offset already comprises the stack bias so, when %sp is the + CFA register, we must avoid counting the stack bias twice. Do not + do that for signal frames as the offset is artificial for them. */ + if (fs->regs.cfa_reg == __builtin_dwarf_sp_column () + && fs->regs.cfa_how == CFA_REG_OFFSET + && fs->regs.cfa_offset != 0 + && !fs->signal_frame) + context->cfa -= 2047; +} + +#else + +/* 32-bit SPARC version */ +#define MD_FALLBACK_FRAME_STATE_FOR sparc_fallback_frame_state + +static _Unwind_Reason_Code +sparc_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned int *pc = context->ra; + int this_cfa = (int) context->cfa; + int new_cfa, ra_location, shifted_ra_location; + int regs_off, fpu_save_off; + int fpu_save; + int old_style, i; + + if (pc[1] != 0x91d02010) /* ta 0x10 */ + return _URC_END_OF_STACK; + + if (pc[0] == 0x821020d8) /* mov NR_sigreturn, %g1 */ + old_style = 1; + else if (pc[0] == 0x82102065) /* mov NR_rt_sigreturn, %g1 */ + old_style = 0; + else + return _URC_END_OF_STACK; + + if (old_style) + { + regs_off = 96; + fpu_save_off = regs_off + (4 * 4) + (16 * 4); + } + else + { + regs_off = 96 + 128; + fpu_save_off = regs_off + (4 * 4) + (16 * 4) + (2 * 4); + } + + new_cfa = *(int *)(this_cfa + regs_off + (4 * 4) + (14 * 4)); + fpu_save = *(int *)(this_cfa + fpu_save_off); + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = __builtin_dwarf_sp_column (); + fs->regs.cfa_offset = new_cfa - this_cfa; + + for (i = 1; i < 16; i++) + { + /* We never restore %sp as everything is purely CFA-based. */ + if ((unsigned int) i == __builtin_dwarf_sp_column ()) + continue; + + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = this_cfa + regs_off + (4 * 4) + (i * 4) - new_cfa; + } + for (i = 0; i < 16; i++) + { + fs->regs.reg[i + 16].how = REG_SAVED_OFFSET; + fs->regs.reg[i + 16].loc.offset + = this_cfa + (i * 4) - new_cfa; + } + if (fpu_save) + { + for (i = 0; i < 32; i++) + { + fs->regs.reg[i + 32].how = REG_SAVED_OFFSET; + fs->regs.reg[i + 32].loc.offset + = fpu_save + (i * 4) - new_cfa; + } + } + + /* State the rules to find the kernel's code "return address", which is + the address of the active instruction when the signal was caught. + On the SPARC, since RETURN_ADDR_OFFSET (essentially 8) is defined, we + need to preventively subtract it from the purported return address. */ + ra_location = this_cfa + regs_off + 4; + shifted_ra_location = this_cfa + regs_off + 3 * 4; /* Y register */ + *(int *)shifted_ra_location = *(int *)ra_location - 8; + fs->retaddr_column = 0; + fs->regs.reg[0].how = REG_SAVED_OFFSET; + fs->regs.reg[0].loc.offset = shifted_ra_location - new_cfa; + fs->signal_frame = 1; + + return _URC_NO_REASON; +} + +#endif diff --git a/gcc/config/sparc/linux.h b/gcc/config/sparc/linux.h new file mode 100644 index 000000000..acdbcb928 --- /dev/null +++ b/gcc/config/sparc/linux.h @@ -0,0 +1,168 @@ +/* Definitions for SPARC running Linux-based GNU systems with ELF. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + Contributed by Eddie C. Dost (ecd@skynet.be) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + LINUX_TARGET_OS_CPP_BUILTINS(); \ + if (TARGET_LONG_DOUBLE_128) \ + builtin_define ("__LONG_DOUBLE_128__"); \ + } \ + while (0) + +/* Provide a ENDFILE_SPEC appropriate for GNU/Linux. Here we tack on + the GNU/Linux magical crtend.o file (see crtstuff.c) which + provides part of the support for getting C++ file-scope static + object constructed before entering `main', followed by a normal + GNU/Linux "finalizer" file, `crtn.o'. */ + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s\ + %{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" + +/* This is for -profile to use -lc_p instead of -lc. */ +#undef CC1_SPEC +#define CC1_SPEC "%{profile:-p} \ +" + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (sparc GNU/Linux with ELF)"); + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#undef CPP_SUBTARGET_SPEC +#define CPP_SUBTARGET_SPEC \ +"%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" + +/* Provide a LINK_SPEC appropriate for GNU/Linux. Here we provide support + for the special GCC options -static and -shared, which allow us to + link things in one of these three modes by applying the appropriate + combinations of options at link-time. + + When the -shared link option is used a final link is not being + done. */ + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" + +#undef LINK_SPEC +#define LINK_SPEC "-m elf32_sparc -Y P,/usr/lib %{shared:-shared} \ + %{!mno-relax:%{!r:-relax}} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " LINUX_DYNAMIC_LINKER "} \ + %{static:-static}}" + +/* It's safe to pass -s always, even if -g is not used. */ +#undef ASM_SPEC +#define ASM_SPEC "\ +-s \ +%{fpic|fPIC|fpie|fPIE:-K PIC} \ +%{!.c:%{findirect-dispatch:-K PIC}} \ +%(asm_cpu) %(asm_relax)" + +#undef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ +do { \ + fputs ("\t.local\t", (FILE)); \ + assemble_name ((FILE), (NAME)); \ + putc ('\n', (FILE)); \ + ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN); \ +} while (0) + +#undef COMMON_ASM_OP +#define COMMON_ASM_OP "\t.common\t" + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf (LABEL, "*.L%s%ld", PREFIX, (long)(NUM)) + + +/* Define for support of TFmode long double. + SPARC ABI says that long double is 4 words. */ +#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64) + +/* Define this to set long double type size to use in libgcc2.c, which can + not depend on target_flags. */ +#ifdef __LONG_DOUBLE_128__ +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128 +#else +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 +#endif + +#undef DITF_CONVERSION_LIBFUNCS +#define DITF_CONVERSION_LIBFUNCS 1 + +#ifdef HAVE_AS_TLS +#undef TARGET_SUN_TLS +#undef TARGET_GNU_TLS +#define TARGET_SUN_TLS 0 +#define TARGET_GNU_TLS 1 +#endif + +/* We use GNU ld so undefine this so that attribute((init_priority)) works. */ +#undef CTORS_SECTION_ASM_OP +#undef DTORS_SECTION_ASM_OP + +/* Static stack checking is supported by means of probes. */ +#define STACK_CHECK_STATIC_BUILTIN 1 + +#define MD_UNWIND_SUPPORT "config/sparc/linux-unwind.h" + +/* Linux currently uses RMO in uniprocessor mode, which is equivalent to + TMO, and TMO in multiprocessor mode. But they reserve the right to + change their minds. */ +#undef SPARC_RELAXED_ORDERING +#define SPARC_RELAXED_ORDERING true + +#undef NEED_INDICATE_EXEC_STACK +#define NEED_INDICATE_EXEC_STACK 1 + +#ifdef TARGET_LIBC_PROVIDES_SSP +/* sparc glibc provides __stack_chk_guard in [%g7 + 0x14]. */ +#define TARGET_THREAD_SSP_OFFSET 0x14 +#endif + +/* Define if long doubles should be mangled as 'g'. */ +#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING + +/* We use glibc _mcount for profiling. */ +#undef NO_PROFILE_COUNTERS +#define NO_PROFILE_COUNTERS 1 diff --git a/gcc/config/sparc/linux64.h b/gcc/config/sparc/linux64.h new file mode 100644 index 000000000..38863588a --- /dev/null +++ b/gcc/config/sparc/linux64.h @@ -0,0 +1,289 @@ +/* Definitions for 64-bit SPARC running Linux-based GNU systems with ELF. + Copyright 1996, 1997, 1998, 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008, + 2009, 2010, 2011 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@caip.rutgers.edu) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + LINUX_TARGET_OS_CPP_BUILTINS(); \ + if (TARGET_ARCH64) \ + builtin_define ("_LONGLONG"); \ + if (TARGET_ARCH32 \ + && TARGET_LONG_DOUBLE_128) \ + builtin_define ("__LONG_DOUBLE_128__"); \ + } \ + while (0) + +#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \ + || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \ + || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 \ + || TARGET_CPU_DEFAULT == TARGET_CPU_niagara \ + || TARGET_CPU_DEFAULT == TARGET_CPU_niagara2 +/* A 64 bit v9 compiler with stack-bias, + in a Medium/Low code model environment. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT \ + (MASK_V9 + MASK_PTR64 + MASK_64BIT /* + MASK_HARD_QUAD */ \ + + MASK_STACK_BIAS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128) +#endif + +/* This must be v9a not just v9 because by default we enable + -mvis. */ +#undef ASM_CPU64_DEFAULT_SPEC +#define ASM_CPU64_DEFAULT_SPEC "-Av9a" + +/* Provide a ENDFILE_SPEC appropriate for GNU/Linux. Here we tack on + the GNU/Linux magical crtend.o file (see crtstuff.c) which + provides part of the support for getting C++ file-scope static + object constructed before entering `main', followed by a normal + GNU/Linux "finalizer" file, `crtn.o'. */ + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s\ + %{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (sparc64 GNU/Linux with ELF)"); + +/* The default code model. */ +#undef SPARC_DEFAULT_CMODEL +#define SPARC_DEFAULT_CMODEL CM_MEDLOW + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +/* Define for support of TFmode long double. + SPARC ABI says that long double is 4 words. */ +#undef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64) + +/* Define this to set long double type size to use in libgcc2.c, which can + not depend on target_flags. */ +#if defined(__arch64__) || defined(__LONG_DOUBLE_128__) +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128 +#else +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 +#endif + +#undef CPP_SUBTARGET_SPEC +#define CPP_SUBTARGET_SPEC "\ +%{posix:-D_POSIX_SOURCE} \ +%{pthread:-D_REENTRANT} \ +" + +/* Provide a LINK_SPEC appropriate for GNU/Linux. Here we provide support + for the special GCC options -static and -shared, which allow us to + link things in one of these three modes by applying the appropriate + combinations of options at link-time. + + When the -shared link option is used a final link is not being + done. */ + +#define GLIBC_DYNAMIC_LINKER32 "/lib/ld-linux.so.2" +#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld-linux.so.2" + +#ifdef SPARC_BI_ARCH + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "link_arch32", LINK_ARCH32_SPEC }, \ + { "link_arch64", LINK_ARCH64_SPEC }, \ + { "link_arch_default", LINK_ARCH_DEFAULT_SPEC }, \ + { "link_arch", LINK_ARCH_SPEC }, + +#define LINK_ARCH32_SPEC "-m elf32_sparc -Y P,%R/usr/lib %{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " LINUX_DYNAMIC_LINKER32 "} \ + %{static:-static}} \ +" + +#define LINK_ARCH64_SPEC "-m elf64_sparc -Y P,%R/usr/lib64 %{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " LINUX_DYNAMIC_LINKER64 "} \ + %{static:-static}} \ +" + +#define LINK_ARCH_SPEC "\ +%{m32:%(link_arch32)} \ +%{m64:%(link_arch64)} \ +%{!m32:%{!m64:%(link_arch_default)}} \ +" + +#define LINK_ARCH_DEFAULT_SPEC \ +(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC) + +#undef LINK_SPEC +#define LINK_SPEC "\ +%(link_arch) \ +%{mlittle-endian:-EL} \ +%{!mno-relax:%{!r:-relax}} \ +" + +#undef CC1_SPEC +#if DEFAULT_ARCH32_P +#define CC1_SPEC "%{profile:-p} \ +%{m32:%{m64:%emay not use both -m32 and -m64}} \ +%{m64:-mptr64 -mstack-bias -mlong-double-128 \ + %{!mcpu*:-mcpu=ultrasparc} \ + %{!mno-vis:%{!mcpu=v9:-mvis}}} \ +" +#else +#define CC1_SPEC "%{profile:-p} \ +%{m32:%{m64:%emay not use both -m32 and -m64}} \ +%{m32:-mptr32 -mno-stack-bias %{!mlong-double-128:-mlong-double-64} \ + %{!mcpu*:-mcpu=cypress}} \ +%{!m32:%{!mcpu*:-mcpu=ultrasparc}} \ +%{!mno-vis:%{!m32:%{!mcpu=v9:-mvis}}} \ +" +#endif + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-cpu is ignored if -mcpu is specified. + --with-tune is ignored if -mtune is specified. + --with-float is ignored if -mhard-float, -msoft-float, -mfpu, or -mno-fpu + are specified. + In the SPARC_BI_ARCH compiler we cannot pass %{!mcpu=*:-mcpu=%(VALUE)} + here, otherwise say -mcpu=v7 would be passed even when -m64. + CC1_SPEC above takes care of this instead. */ +#undef OPTION_DEFAULT_SPECS +#if DEFAULT_ARCH32_P +#define OPTION_DEFAULT_SPECS \ + {"cpu", "%{!m64:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \ + {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" } +#else +#define OPTION_DEFAULT_SPECS \ + {"cpu", "%{!m32:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \ + {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" } +#endif + +#if DEFAULT_ARCH32_P +#define MULTILIB_DEFAULTS { "m32" } +#else +#define MULTILIB_DEFAULTS { "m64" } +#endif + +#else /* !SPARC_BI_ARCH */ + +#undef LINK_SPEC +#define LINK_SPEC "-m elf64_sparc -Y P,%R/usr/lib64 %{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " LINUX_DYNAMIC_LINKER64 "} \ + %{static:-static}} \ +%{mlittle-endian:-EL} \ +%{!mno-relax:%{!r:-relax}} \ +" + +#endif /* !SPARC_BI_ARCH */ + +/* It's safe to pass -s always, even if -g is not used. */ +#undef ASM_SPEC +#define ASM_SPEC "\ +-s \ +%{fpic|fPIC|fpie|fPIE:-K PIC} \ +%{!.c:%{findirect-dispatch:-K PIC}} \ +%{mlittle-endian:-EL} \ +%(asm_cpu) %(asm_arch) %(asm_relax)" + +#undef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ +do { \ + fputs ("\t.local\t", (FILE)); \ + assemble_name ((FILE), (NAME)); \ + putc ('\n', (FILE)); \ + ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN); \ +} while (0) + +#undef COMMON_ASM_OP +#define COMMON_ASM_OP "\t.common\t" + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf (LABEL, "*.L%s%ld", PREFIX, (long)(NUM)) + +/* DWARF bits. */ + +/* Follow Irix 6 and not the Dwarf2 draft in using 64-bit offsets. + Obviously the Dwarf2 folks haven't tried to actually build systems + with their spec. On a 64-bit system, only 64-bit relocs become + RELATIVE relocations. */ + +/* #define DWARF_OFFSET_SIZE PTR_SIZE */ + +#undef DITF_CONVERSION_LIBFUNCS +#define DITF_CONVERSION_LIBFUNCS 1 + +#ifdef HAVE_AS_TLS +#undef TARGET_SUN_TLS +#undef TARGET_GNU_TLS +#define TARGET_SUN_TLS 0 +#define TARGET_GNU_TLS 1 +#endif + +/* We use GNU ld so undefine this so that attribute((init_priority)) works. */ +#undef CTORS_SECTION_ASM_OP +#undef DTORS_SECTION_ASM_OP + +/* Static stack checking is supported by means of probes. */ +#define STACK_CHECK_STATIC_BUILTIN 1 + +#define MD_UNWIND_SUPPORT "config/sparc/linux-unwind.h" + +/* Linux currently uses RMO in uniprocessor mode, which is equivalent to + TMO, and TMO in multiprocessor mode. But they reserve the right to + change their minds. */ +#undef SPARC_RELAXED_ORDERING +#define SPARC_RELAXED_ORDERING true + +#undef NEED_INDICATE_EXEC_STACK +#define NEED_INDICATE_EXEC_STACK 1 + +#ifdef TARGET_LIBC_PROVIDES_SSP +/* sparc glibc provides __stack_chk_guard in [%g7 + 0x14], + sparc64 glibc provides it at [%g7 + 0x28]. */ +#define TARGET_THREAD_SSP_OFFSET (TARGET_ARCH64 ? 0x28 : 0x14) +#endif + +/* Define if long doubles should be mangled as 'g'. */ +#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING + +/* We use glibc _mcount for profiling. */ +#undef NO_PROFILE_COUNTERS +#define NO_PROFILE_COUNTERS 1 diff --git a/gcc/config/sparc/little-endian.opt b/gcc/config/sparc/little-endian.opt new file mode 100644 index 000000000..52db029c0 --- /dev/null +++ b/gcc/config/sparc/little-endian.opt @@ -0,0 +1,27 @@ +; Options for the SPARC port of the compiler +; +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +mlittle-endian +Target Report RejectNegative Mask(LITTLE_ENDIAN) MaskExists +Generate code for little-endian + +mbig-endian +Target Report RejectNegative InverseMask(LITTLE_ENDIAN) +Generate code for big-endian diff --git a/gcc/config/sparc/long-double-switch.opt b/gcc/config/sparc/long-double-switch.opt new file mode 100644 index 000000000..eb3c1a00f --- /dev/null +++ b/gcc/config/sparc/long-double-switch.opt @@ -0,0 +1,27 @@ +; Options for the SPARC port of the compiler +; +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +mlong-double-128 +Target Report RejectNegative Mask(LONG_DOUBLE_128) MaskExists +Use 128-bit long double + +mlong-double-64 +Target Report RejectNegative InverseMask(LONG_DOUBLE_128) +Use 64-bit long double diff --git a/gcc/config/sparc/netbsd-elf.h b/gcc/config/sparc/netbsd-elf.h new file mode 100644 index 000000000..ed9cabe1e --- /dev/null +++ b/gcc/config/sparc/netbsd-elf.h @@ -0,0 +1,246 @@ +/* Definitions of target machine for GCC, for ELF on NetBSD/sparc + and NetBSD/sparc64. + Copyright (C) 2002, 2003, 2004, 2005, 2007, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Matthew Green (mrg@eterna.com.au). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + NETBSD_OS_CPP_BUILTINS_ELF(); \ + if (TARGET_ARCH64) \ + { \ + builtin_define ("__sparc64__"); \ + builtin_define ("__sparc_v9__"); \ + builtin_define ("__sparcv9"); \ + } \ + else \ + builtin_define ("__sparc"); \ + builtin_define ("__sparc__"); \ + } \ + while (0) + +/* CPP defines used by all NetBSD targets. */ +#undef CPP_SUBTARGET_SPEC +#define CPP_SUBTARGET_SPEC "%(netbsd_cpp_spec)" + +/* SIZE_TYPE and PTRDIFF_TYPE are wrong from sparc/sparc.h. */ +#undef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +/* This is the char to use for continuation (in case we need to turn + continuation back on). */ +#undef DBX_CONTIN_CHAR +#define DBX_CONTIN_CHAR '?' + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf ((LABEL), "*.L%s%ld", (PREFIX), (long)(NUM)) + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "" + +#undef ASM_SPEC +#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} \ +%{mlittle-endian:-EL} \ +%(asm_cpu) %(asm_arch) %(asm_relax)" + +#undef STDC_0_IN_SYSTEM_HEADERS + +/* Attempt to enable execute permissions on the stack. */ +#define ENABLE_EXECUTE_STACK NETBSD_ENABLE_EXECUTE_STACK + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (%s)", TARGET_NAME); + +/* Below here exists the merged NetBSD/sparc & NetBSD/sparc64 compiler + description, allowing one to build 32-bit or 64-bit applications + on either. We define the sparc & sparc64 versions of things, + occasionally a neutral version (should be the same as "netbsd-elf.h") + and then based on SPARC_BI_ARCH, DEFAULT_ARCH32_P, and TARGET_CPU_DEFAULT, + we choose the correct version. */ + +/* We use the default NetBSD ELF STARTFILE_SPEC and ENDFILE_SPEC + definitions, even for the SPARC_BI_ARCH compiler, because NetBSD does + not have a default place to find these libraries.. */ + +/* Name the port(s). */ +#define TARGET_NAME64 "NetBSD/sparc64 ELF" +#define TARGET_NAME32 "NetBSD/sparc ELF" + +/* TARGET_CPU_DEFAULT is set in Makefile.in. We test for 64-bit default + platform here. */ + +#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \ + || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc +/* A 64 bit v9 compiler with stack-bias, + in a Medium/Low code model environment. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT \ + (MASK_V9 + MASK_PTR64 + MASK_64BIT /* + MASK_HARD_QUAD */ \ + + MASK_STACK_BIAS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128) + +#undef SPARC_DEFAULT_CMODEL +#define SPARC_DEFAULT_CMODEL CM_MEDANY + +#endif + +/* CC1_SPEC for NetBSD/sparc. */ +#define CC1_SPEC32 \ + "%{m32:%{m64:%emay not use both -m32 and -m64}} \ + %{m64: \ + -mptr64 -mstack-bias -mno-v8plus -mlong-double-128 \ + %{!mcpu*:%{!mv8plus:-mcpu=ultrasparc}} \ + %{!mno-vis:%{!mcpu=v9:-mvis}} \ + %{p:-mcmodel=medlow} \ + %{pg:-mcmodel=medlow}}" + +#define CC1_SPEC64 \ + "%{m32:%{m64:%emay not use both -m32 and -m64}} \ + %{m32: \ + -mptr32 -mno-stack-bias \ + %{!mlong-double-128:-mlong-double-64} \ + %{!mcpu*:%{!mv8plus:-mcpu=cypress}}} \ + %{!m32: \ + %{p:-mcmodel=medlow} \ + %{pg:-mcmodel=medlow}}" + +/* Make sure we use the right output format. Pick a default and then + make sure -m32/-m64 switch to the right one. */ + +#define LINK_ARCH32_SPEC "-m elf32_sparc" + +#define LINK_ARCH64_SPEC "-m elf64_sparc" + +#define LINK_ARCH_SPEC \ + "%{m32:%(link_arch32)} \ + %{m64:%(link_arch64)} \ + %{!m32:%{!m64:%(link_arch_default)}}" + +#undef LINK_SPEC +#define LINK_SPEC \ + "%(link_arch) \ + %{!mno-relax:%{!r:-relax}} \ + %(netbsd_link_spec)" + +#define NETBSD_ENTRY_POINT "__start" + +#if DEFAULT_ARCH32_P +#define LINK_ARCH_DEFAULT_SPEC LINK_ARCH32_SPEC +#else +#define LINK_ARCH_DEFAULT_SPEC LINK_ARCH64_SPEC +#endif + +/* What extra spec entries do we need? */ +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "link_arch32", LINK_ARCH32_SPEC }, \ + { "link_arch64", LINK_ARCH64_SPEC }, \ + { "link_arch_default", LINK_ARCH_DEFAULT_SPEC }, \ + { "link_arch", LINK_ARCH_SPEC }, \ + { "netbsd_cpp_spec", NETBSD_CPP_SPEC }, \ + { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF }, \ + { "netbsd_entry_point", NETBSD_ENTRY_POINT }, + + +/* Build a compiler that supports -m32 and -m64? */ + +#ifdef SPARC_BI_ARCH + +#undef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64) + +#if defined(__arch64__) || defined(__LONG_DOUBLE_128__) +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128 +#else +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 +#endif + +#undef CC1_SPEC +#if DEFAULT_ARCH32_P +#define CC1_SPEC CC1_SPEC32 +#else +#define CC1_SPEC CC1_SPEC64 +#endif + +#if DEFAULT_ARCH32_P +#define MULTILIB_DEFAULTS { "m32" } +#else +#define MULTILIB_DEFAULTS { "m64" } +#endif + +/* Name the port. */ +#undef TARGET_NAME +#define TARGET_NAME (DEFAULT_ARCH32_P ? TARGET_NAME32 : TARGET_NAME64) + +#else /* SPARC_BI_ARCH */ + +#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \ + || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc + +#undef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE 128 + +#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128 + +#undef CC1_SPEC +#define CC1_SPEC CC1_SPEC64 + +#undef TARGET_NAME +#define TARGET_NAME TARGET_NAME64 + +#else /* TARGET_CPU_DEFAULT == TARGET_CPU_v9 \ + || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc */ + +/* A 32-bit only compiler. NetBSD don't support 128 bit `long double' + for 32-bit code, unlike Solaris. */ + +#undef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE 64 + +#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 + +#undef CC1_SPEC +#define CC1_SPEC CC1_SPEC32 + +#undef TARGET_NAME +#define TARGET_NAME TARGET_NAME32 + +#endif /* TARGET_CPU_DEFAULT == TARGET_CPU_v9 \ + || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc */ + +#endif /* SPARC_BI_ARCH */ + +/* We use GNU ld so undefine this so that attribute((init_priority)) works. */ +#undef CTORS_SECTION_ASM_OP +#undef DTORS_SECTION_ASM_OP diff --git a/gcc/config/sparc/niagara.md b/gcc/config/sparc/niagara.md new file mode 100644 index 000000000..e73c65b80 --- /dev/null +++ b/gcc/config/sparc/niagara.md @@ -0,0 +1,118 @@ +;; Scheduling description for Niagara. +;; Copyright (C) 2006, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Niagara is a single-issue processor. + +(define_automaton "niagara_0") + +(define_cpu_unit "niag_pipe" "niagara_0") + +(define_insn_reservation "niag_5cycle" 5 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "multi,flushw,iflush,trap")) + "niag_pipe*5") + +(define_insn_reservation "niag_4cycle" 4 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "savew")) + "niag_pipe*4") + +/* Most basic operations are single-cycle. */ +(define_insn_reservation "niag_ialu" 1 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "ialu,shift,compare,cmove")) + "niag_pipe") + +(define_insn_reservation "niag_imul" 11 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "imul")) + "niag_pipe*11") + +(define_insn_reservation "niag_idiv" 72 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "idiv")) + "niag_pipe*72") + +(define_insn_reservation "niag_branch" 3 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch,branch")) + "niag_pipe*3") + +(define_insn_reservation "niag_3cycle_load" 3 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "load")) + "niag_pipe*3") + +(define_insn_reservation "niag_9cycle_load" 9 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "fpload")) + "niag_pipe*9") + +(define_insn_reservation "niag_1cycle_store" 1 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "store")) + "niag_pipe") + +(define_insn_reservation "niag_8cycle_store" 8 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "fpstore")) + "niag_pipe*8") + +/* Things incorrectly modelled here: + * FPADD{s,d}: 26 cycles + * FPSUB{s,d}: 26 cycles + * FABSD: 26 cycles + * F{s,d}TO{s,d}: 26 cycles + * F{s,d}TO{i,x}: 26 cycles + * FSMULD: 29 cycles + */ +(define_insn_reservation "niag_fmov" 8 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "fpmove,fpcmove,fpcrmove")) + "niag_pipe*8") + +(define_insn_reservation "niag_fpcmp" 26 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "fpcmp")) + "niag_pipe*26") + +(define_insn_reservation "niag_fmult" 29 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "fpmul")) + "niag_pipe*29") + +(define_insn_reservation "niag_fdivs" 54 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "fpdivs")) + "niag_pipe*54") + +(define_insn_reservation "niag_fdivd" 83 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "fpdivd")) + "niag_pipe*83") + +/* Things incorrectly modelled here: + * FPADD{16,32}: 10 cycles + * FPSUB{16,32}: 10 cycles + * FALIGNDATA: 10 cycles + */ +(define_insn_reservation "niag_vis" 8 + (and (eq_attr "cpu" "niagara") + (eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist")) + "niag_pipe*8") diff --git a/gcc/config/sparc/niagara2.md b/gcc/config/sparc/niagara2.md new file mode 100644 index 000000000..298ebe013 --- /dev/null +++ b/gcc/config/sparc/niagara2.md @@ -0,0 +1,90 @@ +;; Scheduling description for Niagara-2. +;; Copyright (C) 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Niagara-2 is a single-issue processor. + +(define_automaton "niagara2_0") + +(define_cpu_unit "niag2_pipe" "niagara2_0") + +(define_insn_reservation "niag2_25cycle" 25 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "flushw")) + "niag2_pipe*25") + +(define_insn_reservation "niag2_5cycle" 5 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "multi,flushw,iflush,trap")) + "niag2_pipe*5") + +(define_insn_reservation "niag2_6cycle" 4 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "savew")) + "niag2_pipe*4") + +/* Most basic operations are single-cycle. */ +(define_insn_reservation "niag2_ialu" 1 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "ialu,shift,compare,cmove")) + "niag2_pipe") + +(define_insn_reservation "niag2_imul" 5 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "imul")) + "niag2_pipe*5") + +(define_insn_reservation "niag2_idiv" 31 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "idiv")) + "niag2_pipe*31") + +(define_insn_reservation "niag2_branch" 5 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch,branch")) + "niag2_pipe*5") + +(define_insn_reservation "niag2_3cycle_load" 3 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "load,fpload")) + "niag2_pipe*3") + +(define_insn_reservation "niag2_1cycle_store" 1 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "store,fpstore")) + "niag2_pipe") + +(define_insn_reservation "niag2_fp" 3 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "fpmove,fpcmove,fpcrmove,fpcmp,fpmul")) + "niag2_pipe*3") + +(define_insn_reservation "niag2_fdivs" 19 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "fpdivs")) + "niag2_pipe*19") + +(define_insn_reservation "niag2_fdivd" 33 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "fpdivd")) + "niag2_pipe*33") + +(define_insn_reservation "niag2_vis" 6 + (and (eq_attr "cpu" "niagara2") + (eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist")) + "niag2_pipe*6") diff --git a/gcc/config/sparc/openbsd1-64.h b/gcc/config/sparc/openbsd1-64.h new file mode 100644 index 000000000..77ca79fe5 --- /dev/null +++ b/gcc/config/sparc/openbsd1-64.h @@ -0,0 +1,23 @@ +/* Configuration file for sparc64 OpenBSD target. + Copyright (C) 1999, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define OBSD_HAS_DECLARE_FUNCTION_NAME +#define OBSD_HAS_DECLARE_FUNCTION_SIZE +#define OBSD_HAS_DECLARE_OBJECT + diff --git a/gcc/config/sparc/openbsd64.h b/gcc/config/sparc/openbsd64.h new file mode 100644 index 000000000..5d87f72e5 --- /dev/null +++ b/gcc/config/sparc/openbsd64.h @@ -0,0 +1,85 @@ +/* Configuration file for sparc64 OpenBSD target. + Copyright (C) 1999, 2005, 2007, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (sparc64 OpenBSD ELF)") + +/* XXX - do we really want HARD_QUAD? */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT \ +(MASK_V9 + MASK_PTR64 + MASK_64BIT + MASK_HARD_QUAD \ + + MASK_APP_REGS + MASK_FPU + MASK_STACK_BIAS + MASK_LONG_DOUBLE_128) + +#undef SPARC_DEFAULT_CMODEL +#define SPARC_DEFAULT_CMODEL CM_MEDMID + +/* Target OS builtins. */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__unix__"); \ + builtin_define ("__OpenBSD__"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=OpenBSD"); \ + builtin_define ("__sparc64__"); \ + builtin_define ("__sparcv9__"); \ + builtin_define ("__sparc_v9__"); \ + builtin_define ("__arch64__"); \ + } \ + while (0) + +#undef CPP_SUBTARGET_SPEC +#define CPP_SUBTARGET_SPEC "" + +/* Inherited from sp64-elf. */ +#undef NO_IMPLICIT_EXTERN_C + +#undef ASM_SPEC +#define ASM_SPEC "\ +-s %{fpic|fPIC|fpie|fPIE:-K PIC} \ +%{mlittle-endian:-EL} \ +%(asm_cpu) %(asm_arch) \ +" + +/* Layout of source language data types. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#undef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE 128 + +#undef LINK_SPEC +#define LINK_SPEC \ + "%{!shared:%{!nostdlib:%{!r:%{!e*:-e __start}}}} \ + %{shared:-shared} %{R*} \ + %{static:-Bstatic} \ + %{!static:-Bdynamic} \ + %{assert*} \ + -dynamic-linker /usr/libexec/ld.so" + +/* As an elf system, we need crtbegin/crtend stuff. */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "\ + %{!shared: %{pg:gcrt0%O%s} %{!pg:%{p:gcrt0%O%s} %{!p:crt0%O%s}} \ + crtbegin%O%s} %{shared:crtbeginS%O%s}" +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}" diff --git a/gcc/config/sparc/predicates.md b/gcc/config/sparc/predicates.md new file mode 100644 index 000000000..4af960a88 --- /dev/null +++ b/gcc/config/sparc/predicates.md @@ -0,0 +1,475 @@ +;; Predicate definitions for SPARC. +;; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Predicates for numerical constants. + +;; Return true if OP is the zero constant for MODE. +(define_predicate "const_zero_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST0_RTX (mode)"))) + +;; Return true if OP is the one constant for MODE. +(define_predicate "const_one_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST1_RTX (mode)"))) + +;; Return true if OP is the integer constant 4096. +(define_predicate "const_4096_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 4096"))) + +;; Return true if OP is a constant that is representable by a 13-bit +;; signed field. This is an acceptable immediate operand for most +;; 3-address instructions. +(define_predicate "small_int_operand" + (and (match_code "const_int") + (match_test "SPARC_SIMM13_P (INTVAL (op))"))) + +;; Return true if OP is a constant operand for the umul instruction. That +;; instruction sign-extends immediate values just like all other SPARC +;; instructions, but interprets the extended result as an unsigned number. +(define_predicate "uns_small_int_operand" + (match_code "const_int,const_double") +{ +#if HOST_BITS_PER_WIDE_INT == 32 + return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000) + || (GET_CODE (op) == CONST_DOUBLE + && CONST_DOUBLE_HIGH (op) == 0 + && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000)); +#else + return (GET_CODE (op) == CONST_INT + && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000) + || (INTVAL (op) >= 0xFFFFF000 + && INTVAL (op) <= 0xFFFFFFFF))); +#endif +}) + +;; Return true if OP is a constant that can be loaded by the sethi instruction. +;; The first test avoids emitting sethi to load zero for example. +(define_predicate "const_high_operand" + (and (match_code "const_int") + (and (not (match_operand 0 "small_int_operand")) + (match_test "SPARC_SETHI_P (INTVAL (op) & GET_MODE_MASK (mode))")))) + +;; Return true if OP is a constant whose 1's complement can be loaded by the +;; sethi instruction. +(define_predicate "const_compl_high_operand" + (and (match_code "const_int") + (and (not (match_operand 0 "small_int_operand")) + (match_test "SPARC_SETHI_P (~INTVAL (op) & GET_MODE_MASK (mode))")))) + +;; Return true if OP is a FP constant that needs to be loaded by the sethi/losum +;; pair of instructions. +(define_predicate "fp_const_high_losum_operand" + (match_operand 0 "const_double_operand") +{ + gcc_assert (mode == SFmode); + return fp_high_losum_p (op); +}) + +;; Return true if OP is a const_double or const_vector. +(define_predicate "const_double_or_vector_operand" + (match_code "const_double,const_vector")) + + +;; Predicates for symbolic constants. + +;; Return true if OP is either a symbol reference or a sum of a symbol +;; reference and a constant. +(define_predicate "symbolic_operand" + (match_code "symbol_ref,label_ref,const") +{ + enum machine_mode omode = GET_MODE (op); + + if (omode != mode && omode != VOIDmode && mode != VOIDmode) + return false; + + switch (GET_CODE (op)) + { + case SYMBOL_REF: + return !SYMBOL_REF_TLS_MODEL (op); + + case LABEL_REF: + return true; + + case CONST: + op = XEXP (op, 0); + return (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF + && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0))) + || GET_CODE (XEXP (op, 0)) == LABEL_REF) + && GET_CODE (XEXP (op, 1)) == CONST_INT); + + default: + gcc_unreachable (); + } +}) + +;; Return true if OP is a symbolic operand for the TLS Global Dynamic model. +(define_predicate "tgd_symbolic_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_GLOBAL_DYNAMIC"))) + +;; Return true if OP is a symbolic operand for the TLS Local Dynamic model. +(define_predicate "tld_symbolic_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_DYNAMIC"))) + +;; Return true if OP is a symbolic operand for the TLS Initial Exec model. +(define_predicate "tie_symbolic_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC"))) + +;; Return true if OP is a symbolic operand for the TLS Local Exec model. +(define_predicate "tle_symbolic_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC"))) + +;; Return true if the operand is an argument used in generating PIC references +;; in either the medium/low or embedded medium/anywhere code models on V9. +;; Check for (const (minus (symbol_ref:GOT) +;; (const (minus (label) (pc))))) +(define_predicate "medium_pic_operand" + (match_code "const") +{ + /* Check for (const (minus (symbol_ref:GOT) + (const (minus (label) (pc))))). */ + op = XEXP (op, 0); + return GET_CODE (op) == MINUS + && GET_CODE (XEXP (op, 0)) == SYMBOL_REF + && GET_CODE (XEXP (op, 1)) == CONST + && GET_CODE (XEXP (XEXP (op, 1), 0)) == MINUS; +}) + +;; Return true if OP is a LABEL_REF of mode MODE. +(define_predicate "label_ref_operand" + (and (match_code "label_ref") + (match_test "GET_MODE (op) == mode"))) + +;; Return true if OP is a data segment reference. This includes the readonly +;; data segment or, in other words, anything but the text segment. +;; This is needed in the embedded medium/anywhere code model on V9. These +;; values are accessed with EMBMEDANY_BASE_REG. */ +(define_predicate "data_segment_operand" + (match_code "symbol_ref,plus,const") +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF : + return ! SYMBOL_REF_FUNCTION_P (op); + case PLUS : + /* Assume canonical format of symbol + constant. + Fall through. */ + case CONST : + return data_segment_operand (XEXP (op, 0), VOIDmode); + default : + gcc_unreachable (); + } +}) + +;; Return true if OP is a text segment reference. +;; This is needed in the embedded medium/anywhere code model on V9. +(define_predicate "text_segment_operand" + (match_code "label_ref,symbol_ref,plus,const") +{ + switch (GET_CODE (op)) + { + case LABEL_REF : + return true; + case SYMBOL_REF : + return SYMBOL_REF_FUNCTION_P (op); + case PLUS : + /* Assume canonical format of symbol + constant. + Fall through. */ + case CONST : + return text_segment_operand (XEXP (op, 0), VOIDmode); + default : + gcc_unreachable (); + } +}) + + +;; Predicates for registers. + +;; Return true if OP is either the zero constant or a register. +(define_predicate "register_or_zero_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_zero_operand"))) + +;; Return true if OP is a register operand in a floating point register. +(define_predicate "fp_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); /* Possibly a MEM */ + return REG_P (op) && SPARC_FP_REG_P (REGNO (op)); +}) + +;; Return true if OP is an integer register. +(define_special_predicate "int_register_operand" + (ior (match_test "register_operand (op, SImode)") + (match_test "TARGET_ARCH64 && register_operand (op, DImode)"))) + +;; Return true if OP is a floating point condition code register. +(define_predicate "fcc_register_operand" + (match_code "reg") +{ + if (mode != VOIDmode && mode != GET_MODE (op)) + return false; + if (mode == VOIDmode + && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode)) + return false; + +#if 0 /* ??? 1 when %fcc0-3 are pseudos first. See gen_compare_reg(). */ + if (reg_renumber == 0) + return REGNO (op) >= FIRST_PSEUDO_REGISTER; + return REGNO_OK_FOR_CCFP_P (REGNO (op)); +#else + return ((unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG) < 4; +#endif +}) + +;; Return true if OP is the floating point condition code register fcc0. +(define_predicate "fcc0_register_operand" + (match_code "reg") +{ + if (mode != VOIDmode && mode != GET_MODE (op)) + return false; + if (mode == VOIDmode + && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode)) + return false; + + return REGNO (op) == SPARC_FCC_REG; +}) + +;; Return true if OP is an integer or floating point condition code register. +(define_predicate "icc_or_fcc_register_operand" + (match_code "reg") +{ + if (REGNO (op) == SPARC_ICC_REG) + { + if (mode != VOIDmode && mode != GET_MODE (op)) + return false; + if (mode == VOIDmode + && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode) + return false; + + return true; + } + + return fcc_register_operand (op, mode); +}) + + +;; Predicates for arithmetic instructions. + +;; Return true if OP is a register, or is a constant that is representable +;; by a 13-bit signed field. This is an acceptable operand for most +;; 3-address instructions. +(define_predicate "arith_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "small_int_operand"))) + +;; 64-bit: Same as above. +;; 32-bit: Return true if OP is a register, or is a constant that is +;; representable by a couple of 13-bit signed fields. This is an +;; acceptable operand for most 3-address splitters. +(define_predicate "arith_double_operand" + (match_code "const_int,const_double,reg,subreg") +{ + bool arith_simple_operand = arith_operand (op, mode); + HOST_WIDE_INT m1, m2; + + if (TARGET_ARCH64 || arith_simple_operand) + return arith_simple_operand; + +#if HOST_BITS_PER_WIDE_INT == 32 + if (GET_CODE (op) != CONST_DOUBLE) + return false; + m1 = CONST_DOUBLE_LOW (op); + m2 = CONST_DOUBLE_HIGH (op); +#else + if (GET_CODE (op) != CONST_INT) + return false; + m1 = trunc_int_for_mode (INTVAL (op), SImode); + m2 = trunc_int_for_mode (INTVAL (op) >> 32, SImode); +#endif + + return SPARC_SIMM13_P (m1) && SPARC_SIMM13_P (m2); +}) + +;; Return true if OP is suitable as second operand for add/sub. +(define_predicate "arith_add_operand" + (ior (match_operand 0 "arith_operand") + (match_operand 0 "const_4096_operand"))) + +;; Return true if OP is suitable as second double operand for add/sub. +(define_predicate "arith_double_add_operand" + (match_code "const_int,const_double,reg,subreg") +{ + bool _arith_double_operand = arith_double_operand (op, mode); + + if (_arith_double_operand) + return true; + + return TARGET_ARCH64 && const_4096_operand (op, mode); +}) + +;; Return true if OP is a register, or is a CONST_INT that can fit in a +;; signed 10-bit immediate field. This is an acceptable SImode operand for +;; the movrcc instructions. +(define_predicate "arith10_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "SPARC_SIMM10_P (INTVAL (op))")))) + +;; Return true if OP is a register, or is a CONST_INT that can fit in a +;; signed 11-bit immediate field. This is an acceptable SImode operand for +;; the movcc instructions. +(define_predicate "arith11_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "SPARC_SIMM11_P (INTVAL (op))")))) + +;; Return true if OP is a register or a constant for the umul instruction. +(define_predicate "uns_arith_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "uns_small_int_operand"))) + + +;; Predicates for miscellaneous instructions. + +;; Return true if OP is valid for the lhs of a comparison insn. +(define_predicate "compare_operand" + (match_code "reg,subreg,zero_extract") +{ + if (GET_CODE (op) == ZERO_EXTRACT) + return (register_operand (XEXP (op, 0), mode) + && small_int_operand (XEXP (op, 1), mode) + && small_int_operand (XEXP (op, 2), mode) + /* This matches cmp_zero_extract. */ + && ((mode == SImode + && INTVAL (XEXP (op, 2)) > 19) + /* This matches cmp_zero_extract_sp64. */ + || (TARGET_ARCH64 + && mode == DImode + && INTVAL (XEXP (op, 2)) > 51))); + else + return register_operand (op, mode); +}) + +;; Return true if OP is a valid operand for the source of a move insn. +(define_predicate "input_operand" + (match_code "const_int,const_double,const_vector,reg,subreg,mem") +{ + enum mode_class mclass; + + /* If both modes are non-void they must be the same. */ + if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op)) + return false; + + mclass = GET_MODE_CLASS (mode); + + /* Allow any 1-instruction integer constant. */ + if (mclass == MODE_INT + && (small_int_operand (op, mode) || const_high_operand (op, mode))) + return true; + + /* If 32-bit mode and this is a DImode constant, allow it + so that the splits can be generated. */ + if (TARGET_ARCH32 + && mode == DImode + && (GET_CODE (op) == CONST_DOUBLE || GET_CODE (op) == CONST_INT)) + return true; + + if ((mclass == MODE_FLOAT && GET_CODE (op) == CONST_DOUBLE) + || (mclass == MODE_VECTOR_INT && GET_CODE (op) == CONST_VECTOR)) + return true; + + if (register_operand (op, mode)) + return true; + + /* If this is a SUBREG, look inside so that we handle paradoxical ones. */ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + /* Check for valid MEM forms. */ + if (GET_CODE (op) == MEM) + return memory_address_p (mode, XEXP (op, 0)); + + return false; +}) + +;; Return true if OP is an address suitable for a call insn. +;; Call insn on SPARC can take a PC-relative constant address +;; or any regular memory address. +(define_predicate "call_address_operand" + (ior (match_operand 0 "symbolic_operand") + (match_test "memory_address_p (Pmode, op)"))) + +;; Return true if OP is an operand suitable for a call insn. +(define_predicate "call_operand" + (and (match_code "mem") + (match_test "call_address_operand (XEXP (op, 0), mode)"))) + + +;; Predicates for operators. + +;; Return true if OP is a comparison operator. This allows the use of +;; MATCH_OPERATOR to recognize all the branch insns. +(define_predicate "noov_compare_operator" + (match_code "ne,eq,ge,gt,le,lt,geu,gtu,leu,ltu") +{ + enum rtx_code code = GET_CODE (op); + if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode + || GET_MODE (XEXP (op, 0)) == CCX_NOOVmode) + /* These are the only branches which work with CC_NOOVmode. */ + return (code == EQ || code == NE || code == GE || code == LT); + return true; +}) + +;; Return true if OP is a 64-bit comparison operator. This allows the use of +;; MATCH_OPERATOR to recognize all the branch insns. +(define_predicate "noov_compare64_operator" + (and (match_code "ne,eq,ge,gt,le,lt,geu,gtu,leu,ltu") + (match_test "TARGET_V9")) +{ + enum rtx_code code = GET_CODE (op); + if (GET_MODE (XEXP (op, 0)) == CCX_NOOVmode) + /* These are the only branches which work with CCX_NOOVmode. */ + return (code == EQ || code == NE || code == GE || code == LT); + return (GET_MODE (XEXP (op, 0)) == CCXmode); +}) + +;; Return true if OP is a comparison operator suitable for use in V9 +;; conditional move or branch on register contents instructions. +(define_predicate "v9_register_compare_operator" + (match_code "eq,ne,ge,lt,le,gt")) + +;; Return true if OP is an operator which can set the condition codes +;; explicitly. We do not include PLUS and MINUS because these +;; require CC_NOOVmode, which we handle explicitly. +(define_predicate "cc_arith_operator" + (match_code "and,ior,xor")) + +;; Return true if OP is an operator which can bitwise complement its +;; second operand and set the condition codes explicitly. +;; XOR is not here because combine canonicalizes (xor (not ...) ...) +;; and (xor ... (not ...)) to (not (xor ...)). */ +(define_predicate "cc_arith_not_operator" + (match_code "and,ior")) diff --git a/gcc/config/sparc/rtemself.h b/gcc/config/sparc/rtemself.h new file mode 100644 index 000000000..f0b8202ad --- /dev/null +++ b/gcc/config/sparc/rtemself.h @@ -0,0 +1,33 @@ +/* Definitions for rtems targeting a SPARC using ELF. + Copyright (C) 1996, 1997, 2000, 2002, 2005, 2007 Free Software Foundation, Inc. + Contributed by Joel Sherrill (joel@OARcorp.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Target OS builtins. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__rtems__"); \ + builtin_define ("__USE_INIT_FINI__"); \ + builtin_assert ("system=rtems"); \ + } \ + while (0) + +/* Use the default */ +#undef LINK_GCC_C_SEQUENCE_SPEC diff --git a/gcc/config/sparc/sol2-64.h b/gcc/config/sparc/sol2-64.h new file mode 100644 index 000000000..41e228114 --- /dev/null +++ b/gcc/config/sparc/sol2-64.h @@ -0,0 +1,22 @@ +/* Definitions of target machine for GCC, for bi-arch SPARC + running Solaris 2, defaulting to 64-bit code generation. + + Copyright (C) 1999, 2010, 2011 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define TARGET_64BIT_DEFAULT 1 diff --git a/gcc/config/sparc/sol2-bi.h b/gcc/config/sparc/sol2-bi.h new file mode 100644 index 000000000..356e8256f --- /dev/null +++ b/gcc/config/sparc/sol2-bi.h @@ -0,0 +1,271 @@ +/* Definitions of target machine for GCC, for bi-arch SPARC + running Solaris 2 using the system assembler and linker. + Copyright (C) 2002, 2003, 2004, 2006, 2007, 2009, 2010, 2011, 2012 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* The default code model used to be CM_MEDANY on Solaris + but even Sun eventually found it to be quite wasteful + and changed it to CM_MEDMID in the Studio 9 compiler. */ +#undef SPARC_DEFAULT_CMODEL +#define SPARC_DEFAULT_CMODEL CM_MEDMID + +#define AS_SPARC64_FLAG "-xarch=v9" + +#undef ASM_CPU32_DEFAULT_SPEC +#define ASM_CPU32_DEFAULT_SPEC "" +#undef ASM_CPU64_DEFAULT_SPEC +#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG + +#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 +#undef CPP_CPU64_DEFAULT_SPEC +#define CPP_CPU64_DEFAULT_SPEC "" +#undef ASM_CPU32_DEFAULT_SPEC +#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc +#undef CPP_CPU64_DEFAULT_SPEC +#define CPP_CPU64_DEFAULT_SPEC "" +#undef ASM_CPU32_DEFAULT_SPEC +#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusa" +#undef ASM_CPU64_DEFAULT_SPEC +#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "a" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 +#undef CPP_CPU64_DEFAULT_SPEC +#define CPP_CPU64_DEFAULT_SPEC "" +#undef ASM_CPU32_DEFAULT_SPEC +#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb" +#undef ASM_CPU64_DEFAULT_SPEC +#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara +#undef CPP_CPU64_DEFAULT_SPEC +#define CPP_CPU64_DEFAULT_SPEC "" +#undef ASM_CPU32_DEFAULT_SPEC +#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb" +#undef ASM_CPU64_DEFAULT_SPEC +#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara2 +#undef CPP_CPU64_DEFAULT_SPEC +#define CPP_CPU64_DEFAULT_SPEC "" +#undef ASM_CPU32_DEFAULT_SPEC +#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb" +#undef ASM_CPU64_DEFAULT_SPEC +#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b" +#endif + +#if DEFAULT_ARCH32_P +#define DEF_ARCH32_SPEC(__str) "%{!m64:" __str "}" +#define DEF_ARCH64_SPEC(__str) "%{m64:" __str "}" +#else +#define DEF_ARCH32_SPEC(__str) "%{m32:" __str "}" +#define DEF_ARCH64_SPEC(__str) "%{!m32:" __str "}" +#endif + +#undef CPP_CPU_SPEC +#define CPP_CPU_SPEC "\ +%{mcpu=sparclet|mcpu=tsc701:-D__sparclet__} \ +%{mcpu=sparclite|mcpu-f930|mcpu=f934:-D__sparclite__} \ +%{mcpu=v8:" DEF_ARCH32_SPEC("-D__sparcv8") "} \ +%{mcpu=supersparc:-D__supersparc__ " DEF_ARCH32_SPEC("-D__sparcv8") "} \ +%{mcpu=v9|mcpu=ultrasparc|mcpu=ultrasparc3|mcpu=niagara|mcpu=niagara2:" DEF_ARCH32_SPEC("-D__sparcv8") "} \ +%{!mcpu*:%(cpp_cpu_default)} \ +" + +#undef ASM_CPU_SPEC +#define ASM_CPU_SPEC "\ +%{mcpu=v9:" DEF_ARCH32_SPEC("-xarch=v8plus") DEF_ARCH64_SPEC(AS_SPARC64_FLAG) "} \ +%{mcpu=ultrasparc:" DEF_ARCH32_SPEC("-xarch=v8plusa") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "a") "} \ +%{mcpu=ultrasparc3:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \ +%{mcpu=niagara:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \ +%{mcpu=niagara2:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \ +%{!mcpu=niagara2:%{!mcpu=niagara:%{!mcpu=ultrasparc3:%{!mcpu=ultrasparc:%{!mcpu=v9:%{mcpu*:" DEF_ARCH32_SPEC("-xarch=v8") DEF_ARCH64_SPEC(AS_SPARC64_FLAG) "}}}}}} \ +%{!mcpu*:%(asm_cpu_default)} \ +" + +#undef CPP_CPU_DEFAULT_SPEC +#define CPP_CPU_DEFAULT_SPEC \ +(DEFAULT_ARCH32_P ? "\ +%{m64:" CPP_CPU64_DEFAULT_SPEC "} \ +%{!m64:" CPP_CPU32_DEFAULT_SPEC "} \ +" : "\ +%{m32:" CPP_CPU32_DEFAULT_SPEC "} \ +%{!m32:" CPP_CPU64_DEFAULT_SPEC "} \ +") + +#undef ASM_CPU_DEFAULT_SPEC +#define ASM_CPU_DEFAULT_SPEC \ +(DEFAULT_ARCH32_P ? "\ +%{m64:" ASM_CPU64_DEFAULT_SPEC "} \ +%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \ +" : "\ +%{m32:" ASM_CPU32_DEFAULT_SPEC "} \ +%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \ +") + +/* wchar_t is called differently in for 32 and 64-bit + compilations. This is called for by SCD 2.4.1, p. 6-83, Figure 6-65 + (32-bit) and p. 6P-10, Figure 6.38 (64-bit). */ + +#undef WCHAR_TYPE +#define WCHAR_TYPE (TARGET_ARCH64 ? "int" : "long int") + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +/* Same for wint_t. See SCD 2.4.1, p. 6-83, Figure 6-66 (32-bit). There's + no corresponding 64-bit definition, but this is what Solaris 8 + uses. */ + +#undef WINT_TYPE +#define WINT_TYPE (TARGET_ARCH64 ? "int" : "long int") + +#undef WINT_TYPE_SIZE +#define WINT_TYPE_SIZE 32 + +#undef CPP_ARCH32_SPEC +#define CPP_ARCH32_SPEC "" +#undef CPP_ARCH64_SPEC +#define CPP_ARCH64_SPEC "-D__arch64__ -D__sparcv9" + +#undef CPP_ARCH_SPEC +#define CPP_ARCH_SPEC "\ +%{m32:%(cpp_arch32)} \ +%{m64:%(cpp_arch64)} \ +%{!m32:%{!m64:%(cpp_arch_default)}} \ +" + +#undef ASM_ARCH_SPEC +#define ASM_ARCH_SPEC "" + +#undef ASM_ARCH32_SPEC +#define ASM_ARCH32_SPEC "" + +#undef ASM_ARCH64_SPEC +#define ASM_ARCH64_SPEC "" + +#undef ASM_ARCH_DEFAULT_SPEC +#define ASM_ARCH_DEFAULT_SPEC "" + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "startfile_arch", STARTFILE_ARCH_SPEC }, \ + { "link_arch32", LINK_ARCH32_SPEC }, \ + { "link_arch64", LINK_ARCH64_SPEC }, \ + { "link_arch_default", LINK_ARCH_DEFAULT_SPEC }, \ + { "link_arch", LINK_ARCH_SPEC }, + +/* + * This should be the same as in sol2.h, except with "/sparcv9" + * appended to the paths and /usr/ccs/lib is no longer necessary + */ +#define LINK_ARCH64_SPEC_BASE \ + "%{mcmodel=medlow:-M /usr/lib/ld/sparcv9/map.below4G} \ + %{G:-G} \ + %{YP,*} \ + %{R*} \ + %{compat-bsd: \ + %{!YP,*:%{p|pg:-Y P,%R/usr/ucblib/sparcv9:%R/usr/lib/libp/sparcv9:%R/usr/lib/sparcv9:%R/lib/sparcv9} \ + %{!p:%{!pg:-Y P,%R/usr/ucblib/sparcv9:%R/usr/lib/sparcv9:%R/lib/sparcv9}}} \ + -R %R/usr/ucblib/sparcv9} \ + %{!compat-bsd: \ + %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp/sparcv9:%R/usr/lib/sparcv9:%R/lib/sparcv9} \ + %{!p:%{!pg:-Y P,%R/usr/lib/sparcv9:%R/lib/sparcv9}}}}" + +#define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE + +#undef LINK_ARCH_SPEC +#if DISABLE_MULTILIB +#if DEFAULT_ARCH32_P +#define LINK_ARCH_SPEC "\ +%{m32:%(link_arch32)} \ +%{m64:%edoes not support multilib} \ +%{!m32:%{!m64:%(link_arch_default)}} \ +" +#else +#define LINK_ARCH_SPEC "\ +%{m32:%edoes not support multilib} \ +%{m64:%(link_arch64)} \ +%{!m32:%{!m64:%(link_arch_default)}} \ +" +#endif +#else +#define LINK_ARCH_SPEC "\ +%{m32:%(link_arch32)} \ +%{m64:%(link_arch64)} \ +%{!m32:%{!m64:%(link_arch_default)}} \ +" +#endif + +#define LINK_ARCH_DEFAULT_SPEC \ +(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC) + +#undef CC1_SPEC +#if DEFAULT_ARCH32_P +#define CC1_SPEC "\ +%{m64:%{m32:%emay not use both -m32 and -m64}} \ +%{m64:-mptr64 -mstack-bias -mno-v8plus \ + %{!mcpu*:-%{!mv8plus:mcpu=v9}}} \ +" +#else +#define CC1_SPEC "\ +%{m32:%{m64:%emay not use both -m32 and -m64}} \ +%{m32:-mptr32 -mno-stack-bias \ + %{!mcpu*:%{!mv8plus:-mcpu=v9}}} \ +%{mv8plus:-m32 -mptr32 -mno-stack-bias \ + %{!mcpu*:-mcpu=v9}} \ +" +#endif + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-cpu is ignored if -mcpu is specified. + --with-tune is ignored if -mtune is specified. + --with-float is ignored if -mhard-float, -msoft-float, -mfpu, or -mno-fpu + are specified. + In the SPARC_BI_ARCH compiler we cannot pass %{!mcpu=*:-mcpu=%(VALUE)} + here, otherwise say -mcpu=v7 would be passed even when -m64. + CC1_SPEC above takes care of this instead. */ +#undef OPTION_DEFAULT_SPECS +#if DEFAULT_ARCH32_P +#define OPTION_DEFAULT_SPECS \ + {"cpu", "%{!m64:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \ + {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" } +#else +#define OPTION_DEFAULT_SPECS \ + {"cpu", "%{!m32:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \ + {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" } +#endif + +#if DEFAULT_ARCH32_P +#define MULTILIB_DEFAULTS { "m32" } +#else +#define MULTILIB_DEFAULTS { "m64" } +#endif diff --git a/gcc/config/sparc/sol2-c1.asm b/gcc/config/sparc/sol2-c1.asm new file mode 100644 index 000000000..63aa748e8 --- /dev/null +++ b/gcc/config/sparc/sol2-c1.asm @@ -0,0 +1,103 @@ +! crt1.s for sparc & sparcv9 (SunOS 5) + +! Copyright (C) 1992, 2009 Free Software Foundation, Inc. +! Written By David Vinayak Henkel-Wallace, June 1992 +! +! This file is free software; you can redistribute it and/or modify it +! under the terms of the GNU General Public License as published by the +! Free Software Foundation; either version 3, or (at your option) any +! later version. +! +! This file is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! General Public License for more details. +! +! Under Section 7 of GPL version 3, you are granted additional +! permissions described in the GCC Runtime Library Exception, version +! 3.1, as published by the Free Software Foundation. +! +! You should have received a copy of the GNU General Public License and +! a copy of the GCC Runtime Library Exception along with this program; +! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +! . + +! This file takes control of the process from the kernel, as specified +! in section 3 of the SVr4 ABI. +! This file is the first thing linked into any executable. + +#ifdef __sparcv9 +#define CPTRSIZE 8 +#define CPTRSHIFT 3 +#define STACK_BIAS 2047 +#define ldn ldx +#define stn stx +#define setn(s, scratch, dst) setx s, scratch, dst +#else +#define CPTRSIZE 4 +#define CPTRSHIFT 2 +#define STACK_BIAS 0 +#define ldn ld +#define stn st +#define setn(s, scratch, dst) set s, dst +#endif + + .section ".text" + .proc 022 + .global _start + +_start: + mov 0, %fp ! Mark bottom frame pointer + ldn [%sp + (16 * CPTRSIZE) + STACK_BIAS], %l0 ! argc + add %sp, (17 * CPTRSIZE) + STACK_BIAS, %l1 ! argv + + ! Leave some room for a call. Sun leaves 32 octets (to sit on + ! a cache line?) so we do too. +#ifdef __sparcv9 + sub %sp, 48, %sp +#else + sub %sp, 32, %sp +#endif + + ! %g1 may contain a function to be registered w/atexit + orcc %g0, %g1, %g0 +#ifdef __sparcv9 + be %xcc, .nope +#else + be .nope +#endif + mov %g1, %o0 + call atexit + nop +.nope: + ! Now make sure constructors and destructors are handled. + setn(_fini, %o1, %o0) + call atexit, 1 + nop + call _init, 0 + nop + + ! We ignore the auxiliary vector; there is no defined way to + ! access those data anyway. Instead, go straight to main: + mov %l0, %o0 ! argc + mov %l1, %o1 ! argv +#ifdef GCRT1 + setn(___Argv, %o4, %o3) + stn %o1, [%o3] ! *___Argv +#endif + ! Skip argc words past argv, to env: + sll %l0, CPTRSHIFT, %o2 + add %o2, CPTRSIZE, %o2 + add %l1, %o2, %o2 ! env + setn(_environ, %o4, %o3) + stn %o2, [%o3] ! *_environ + call main, 4 + nop + call exit, 0 + nop + call _exit, 0 + nop + ! We should never get here. + + .type _start,#function + .size _start,.-_start diff --git a/gcc/config/sparc/sol2-ci.asm b/gcc/config/sparc/sol2-ci.asm new file mode 100644 index 000000000..8825f7958 --- /dev/null +++ b/gcc/config/sparc/sol2-ci.asm @@ -0,0 +1,55 @@ +! crti.s for solaris 2.0. + +! Copyright (C) 1992, 2008, 2009 Free Software Foundation, Inc. +! Written By David Vinayak Henkel-Wallace, June 1992 +! +! This file is free software; you can redistribute it and/or modify it +! under the terms of the GNU General Public License as published by the +! Free Software Foundation; either version 3, or (at your option) any +! later version. +! +! This file is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! General Public License for more details. +! +! Under Section 7 of GPL version 3, you are granted additional +! permissions described in the GCC Runtime Library Exception, version +! 3.1, as published by the Free Software Foundation. +! +! You should have received a copy of the GNU General Public License and +! a copy of the GCC Runtime Library Exception along with this program; +! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +! . + +! This file just make a stack frame for the contents of the .fini and +! .init sections. Users may put any desired instructions in those +! sections. + +! This file is linked in before the Values-Xx.o files and also before +! crtbegin, with which perhaps it should be merged. + + .section ".init" + .proc 022 + .global _init + .type _init,#function + .align 4 +_init: +#ifdef __sparcv9 + save %sp, -176, %sp +#else + save %sp, -96, %sp +#endif + + + .section ".fini" + .proc 022 + .global _fini + .type _fini,#function + .align 4 +_fini: +#ifdef __sparcv9 + save %sp, -176, %sp +#else + save %sp, -96, %sp +#endif diff --git a/gcc/config/sparc/sol2-cn.asm b/gcc/config/sparc/sol2-cn.asm new file mode 100644 index 000000000..b92f3cf08 --- /dev/null +++ b/gcc/config/sparc/sol2-cn.asm @@ -0,0 +1,41 @@ +! crtn.s for solaris 2.0. + +! Copyright (C) 1992, 2008, 2009 Free Software Foundation, Inc. +! Written By David Vinayak Henkel-Wallace, June 1992 +! +! This file is free software; you can redistribute it and/or modify it +! under the terms of the GNU General Public License as published by the +! Free Software Foundation; either version 3, or (at your option) any +! later version. +! +! This file is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! General Public License for more details. +! +! Under Section 7 of GPL version 3, you are granted additional +! permissions described in the GCC Runtime Library Exception, version +! 3.1, as published by the Free Software Foundation. +! +! You should have received a copy of the GNU General Public License and +! a copy of the GCC Runtime Library Exception along with this program; +! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +! . + +! This file just makes sure that the .fini and .init sections do in +! fact return. Users may put any desired instructions in those sections. +! This file is the last thing linked into any executable. + + .section ".init" + .align 4 + + ret + restore + + .section ".fini" + .align 4 + + ret + restore + +! Th-th-th-that is all folks! diff --git a/gcc/config/sparc/sol2-gas-bi.h b/gcc/config/sparc/sol2-gas-bi.h new file mode 100644 index 000000000..001f978b8 --- /dev/null +++ b/gcc/config/sparc/sol2-gas-bi.h @@ -0,0 +1,23 @@ +/* Definitions of target machine for GCC, for bi-arch SPARC + running Solaris 2 using the GNU assembler. + + Copyright (C) 2002, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef AS_SPARC64_FLAG +#define AS_SPARC64_FLAG "-TSO -64 -Av9" diff --git a/gcc/config/sparc/sol2-gas.h b/gcc/config/sparc/sol2-gas.h new file mode 100644 index 000000000..d83e7b917 --- /dev/null +++ b/gcc/config/sparc/sol2-gas.h @@ -0,0 +1,47 @@ +/* Definitions of target machine for GCC, for SPARC running Solaris 2 + using the GNU assembler. + Copyright (C) 2004, 2005, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Undefine this as the filler pattern doesn't work with GNU as. */ +#undef ASM_OUTPUT_ALIGN_WITH_NOP + +/* Undefine this so that BNSYM/ENSYM pairs are emitted by STABS+. */ +#undef NO_DBX_BNSYM_ENSYM + +/* Use GNU extensions to TLS support. */ +#ifdef HAVE_AS_TLS +#undef TARGET_SUN_TLS +#undef TARGET_GNU_TLS +#define TARGET_SUN_TLS 0 +#define TARGET_GNU_TLS 1 +#endif + +/* Use default ELF section syntax. */ +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section + +/* And standard pushsection syntax. While GNU as supports the non-standard + variant too, we prefer the former. */ +#undef PUSHSECTION_FORMAT +#define PUSHSECTION_FORMAT "\t.pushsection\t%s\n" diff --git a/gcc/config/sparc/sol2-gld-bi.h b/gcc/config/sparc/sol2-gld-bi.h new file mode 100644 index 000000000..3be20b2cd --- /dev/null +++ b/gcc/config/sparc/sol2-gld-bi.h @@ -0,0 +1,67 @@ +/* Definitions of target machine for GCC, for bi-arch SPARC + running Solaris 2 using the GNU linker. + +Copyright (C) 2002, 2003, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#undef LINK_ARCH32_SPEC +#define LINK_ARCH32_SPEC \ + LINK_ARCH32_SPEC_BASE "%{!static: -rpath-link %R/usr/lib}" + +#undef LINK_ARCH64_SPEC +#define LINK_ARCH64_SPEC \ + LINK_ARCH64_SPEC_BASE "%{!static: -rpath-link %R/usr/lib/sparcv9}" + +/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly + follow the Solaris 2 ABI. Prefer them if present. */ +#ifdef HAVE_LD_SOL2_EMULATION +#define SPARC32_EMULATION "elf32_sparc_sol2" +#define SPARC64_EMULATION "elf64_sparc_sol2" +#else +#define SPARC32_EMULATION "elf32_sparc" +#define SPARC64_EMULATION "elf64_sparc" +#endif + +#undef LINK_ARCH_SPEC +#if DISABLE_MULTILIB +#if DEFAULT_ARCH32_P +#define LINK_ARCH_SPEC "\ +%{m32:-m " SPARC32_EMULATION " %(link_arch32)} \ +%{m64:%edoes not support multilib} \ +%{!m32:%{!m64:%(link_arch_default)}} \ +" +#else +#define LINK_ARCH_SPEC "\ +%{m32:%edoes not support multilib} \ +%{m64:-m " SPARC64_EMULATION " %(link_arch64)} \ +%{!m32:%{!m64:%(link_arch_default)}} \ +" +#endif +#else +#define LINK_ARCH_SPEC "\ +%{m32:-m " SPARC32_EMULATION " %(link_arch32)} \ +%{m64:-m " SPARC64_EMULATION " %(link_arch64)} \ +%{!m32:%{!m64:%(link_arch_default)}} \ +" +#endif + diff --git a/gcc/config/sparc/sol2-unwind.h b/gcc/config/sparc/sol2-unwind.h new file mode 100644 index 000000000..d6c4f6c1f --- /dev/null +++ b/gcc/config/sparc/sol2-unwind.h @@ -0,0 +1,480 @@ +/* DWARF2 EH unwinding support for SPARC Solaris. + Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. */ + +#include +#include +#include + +#if defined(__arch64__) + +#define IS_SIGHANDLER sparc64_is_sighandler + +static int +sparc64_is_sighandler (unsigned int *pc, void *cfa, int *nframes) +{ + if (/* Solaris 8 - single-threaded + ---------------------------- + : add %g5, %o7, %o2 + : ldx [ %o2 + 0xfa0 ], %g5 + : sra %i0, 0, %o0 + : sllx %o0, 3, %g4 + : ldx [ %g4 + %g5 ], %l0 + : call %l0 + : mov %i2, %o2 + : cmp %i3, 8 <--- PC */ + ( pc[-7] == 0x9401400f + && pc[-6] == 0xca5aafa0 + && pc[-5] == 0x913e2000 + && pc[-4] == 0x892a3003 + && pc[-3] == 0xe0590005 + && pc[-2] == 0x9fc40000 + && pc[-1] == 0x9410001a + && pc[ 0] == 0x80a6e008) + + || /* Solaris 9 - single-threaded + ---------------------------- + The pattern changes slightly in different versions of the + operating system, so we skip the comparison against pc[-6] for + Solaris 9. + + : sra %i0, 0, %l1 + + Solaris 9 5/02: + : ldx [ %o2 + 0xf68 ], %g5 + Solaris 9 9/05: + : ldx [ %o2 + 0xe50 ], %g5 + + : sllx %l1, 3, %g4 + : mov %l1, %o0 + : ldx [ %g4 + %g5 ], %l0 + : call %l0 + : mov %i2, %o2 + : cmp %l1, 8 <--- PC */ + ( pc[-7] == 0xa33e2000 + /* skip pc[-6] */ + && pc[-5] == 0x892c7003 + && pc[-4] == 0x90100011 + && pc[-3] == 0xe0590005 + && pc[-2] == 0x9fc40000 + && pc[-1] == 0x9410001a + && pc[ 0] == 0x80a46008)) + { + /* We need to move up one frame: + + <-- context->cfa + sigacthandler + + */ + *nframes = 1; + return 1; + } + + if (/* Solaris 8+ - multi-threaded + ---------------------------- + <__sighndlr>: save %sp, -176, %sp + <__sighndlr+4>: mov %i0, %o0 + <__sighndlr+8>: mov %i1, %o1 + <__sighndlr+12>: call %i3 + <__sighndlr+16>: mov %i2, %o2 + <__sighndlr+20>: ret <--- PC + <__sighndlr+24>: restore */ + pc[-5] == 0x9de3bf50 + && pc[-4] == 0x90100018 + && pc[-3] == 0x92100019 + && pc[-2] == 0x9fc6c000 + && pc[-1] == 0x9410001a + && pc[ 0] == 0x81c7e008 + && pc[ 1] == 0x81e80000) + { + /* We have observed different calling frames among different + versions of the operating system, so that we need to + discriminate using the upper frame. We look for the return + address of the caller frame (there is an offset of 15 double + words between the frame address and the place where this return + address is stored) in order to do some more pattern matching. */ + unsigned int cuh_pattern + = *(unsigned int *)(*(unsigned long *)(cfa + 15*8) - 4); + + if (cuh_pattern == 0x92100019) + /* This matches the call_user_handler pattern for Solaris 11. + This is the same setup as for Solaris 9, see below. */ + *nframes = 3; + + else if (cuh_pattern == 0xd25fa7ef) + { + /* This matches the call_user_handler pattern for Solaris 10. + There are 2 cases so we look for the return address of the + caller's caller frame in order to do more pattern matching. */ + unsigned long sah_address = *(unsigned long *)(cfa + 176 + 15*8); + + if (sah_address && *(unsigned int *)(sah_address - 4) == 0x92100019) + /* This is the same setup as for Solaris 9, see below. */ + *nframes = 3; + else + /* The sigacthandler frame isn't present in the chain. + We need to move up two frames: + + <-- context->cfa + __sighndlr + call_user_handler frame + + */ + *nframes = 2; + } + + else if (cuh_pattern == 0x9410001a || cuh_pattern == 0x94100013) + /* This matches the call_user_handler pattern for Solaris 9 and + for Solaris 8 running inside Solaris Containers respectively + We need to move up three frames: + + <-- context->cfa + __sighndlr + call_user_handler + sigacthandler + + */ + *nframes = 3; + + else /* cuh_pattern == 0xe0272010 */ + /* This is the default Solaris 8 case. + We need to move up two frames: + + <-- context->cfa + __sighndlr + sigacthandler + + */ + *nframes = 2; + + return 1; + } + + return 0; +} + +#define MD_FALLBACK_FRAME_STATE_FOR sparc64_fallback_frame_state + +#define MD_FROB_UPDATE_CONTEXT sparc64_frob_update_context + +static void +sparc64_frob_update_context (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + /* The column of %sp contains the old CFA, not the old value of %sp. + The CFA offset already comprises the stack bias so, when %sp is the + CFA register, we must avoid counting the stack bias twice. Do not + do that for signal frames as the offset is artificial for them. */ + if (fs->regs.cfa_reg == __builtin_dwarf_sp_column () + && fs->regs.cfa_how == CFA_REG_OFFSET + && fs->regs.cfa_offset != 0 + && !fs->signal_frame) + context->cfa -= STACK_BIAS; +} + +#else + +#define IS_SIGHANDLER sparc_is_sighandler + +static int +sparc_is_sighandler (unsigned int *pc, void *cfa, int *nframes) +{ + if (/* Solaris 8, 9 - single-threaded + ------------------------------- + The pattern changes slightly in different versions of the operating + system, so we skip the comparison against pc[-6]. + + : add %o1, %o7, %o3 + : mov %i1, %o1 + + : ld [ %o3 + ], %o2 + + : sll %i0, 2, %o0 + : ld [ %o0 + %o2 ], %l0 + : mov %i0, %o0 + : call %l0 + : mov %i2, %o2 + : cmp %i0, 8 <--- PC */ + pc[-8] == 0x9602400f + && pc[-7] == 0x92100019 + /* skip pc[-6] */ + && pc[-5] == 0x912e2002 + && pc[-4] == 0xe002000a + && pc[-3] == 0x90100018 + && pc[-2] == 0x9fc40000 + && pc[-1] == 0x9410001a + && pc[ 0] == 0x80a62008) + { + /* We need to move up one frame: + + <-- context->cfa + sigacthandler + + */ + *nframes = 1; + return 1; + } + + if (/* Solaris 8 - multi-threaded + --------------------------- + <__libthread_segvhdlr+212>: clr %o2 + <__libthread_segvhdlr+216>: ld [ %fp + -28 ], %l0 + <__libthread_segvhdlr+220>: mov %i4, %o0 + <__libthread_segvhdlr+224>: mov %i1, %o1 + <__libthread_segvhdlr+228>: call %l0 + <__libthread_segvhdlr+232>: mov %i2, %o2 + <__libthread_segvhdlr+236>: ret <--- PC + <__libthread_segvhdlr+240>: restore + <__libthread_segvhdlr+244>: cmp %o1, 0 */ + pc[-6] == 0x94102000 + && pc[-5] == 0xe007bfe4 + && pc[-4] == 0x9010001c + && pc[-3] == 0x92100019 + && pc[-2] == 0x9fc40000 + && pc[-1] == 0x9410001a + && pc[ 0] == 0x81c7e008 + && pc[ 1] == 0x81e80000 + && pc[ 2] == 0x80a26000) + { + /* We need to move up one frame: + + <-- context->cfa + __libthread_segvhdlr + + */ + *nframes = 1; + return 1; + } + + if(/* Solaris 8+ - multi-threaded + ---------------------------- + <__sighndlr>: save %sp, -96, %sp + <__sighndlr+4>: mov %i0, %o0 + <__sighndlr+8>: mov %i1, %o1 + <__sighndlr+12>: call %i3 + <__sighndlr+16>: mov %i2, %o2 + <__sighndlr+20>: ret <--- PC + <__sighndlr+24>: restore */ + pc[-5] == 0x9de3bfa0 + && pc[-4] == 0x90100018 + && pc[-3] == 0x92100019 + && pc[-2] == 0x9fc6c000 + && pc[-1] == 0x9410001a + && pc[ 0] == 0x81c7e008 + && pc[ 1] == 0x81e80000) + { + /* We have observed different calling frames among different + versions of the operating system, so that we need to + discriminate using the upper frame. We look for the return + address of the caller frame (there is an offset of 15 words + between the frame address and the place where this return + address is stored) in order to do some more pattern matching. */ + unsigned int cuh_pattern + = *(unsigned int *)(*(unsigned int *)(cfa + 15*4) - 4); + + if (cuh_pattern == 0x92100019) + /* This matches the call_user_handler pattern for Solaris 11. + This is the same setup as for Solaris 9, see below. */ + *nframes = 3; + + else if (cuh_pattern == 0xd407a04c) + { + /* This matches the call_user_handler pattern for Solaris 10. + There are 2 cases so we look for the return address of the + caller's caller frame in order to do more pattern matching. */ + unsigned int sah_address = *(unsigned int *)(cfa + 96 + 15*4); + + if (sah_address && *(unsigned int *)(sah_address - 4) == 0x92100019) + /* This is the same setup as for Solaris 9, see below. */ + *nframes = 3; + else + /* The sigacthandler frame isn't present in the chain. + We need to move up two frames: + + <-- context->cfa + __sighndlr + call_user_handler frame + + */ + *nframes = 2; + } + + else if (cuh_pattern == 0x9410001a || cuh_pattern == 0x9410001b) + /* This matches the call_user_handler pattern for Solaris 9 and + for Solaris 8 running inside Solaris Containers respectively. + We need to move up three frames: + + <-- context->cfa + __sighndlr + call_user_handler + sigacthandler + + */ + *nframes = 3; + + else /* cuh_pattern == 0x90100018 */ + /* This is the default Solaris 8 case. + We need to move up two frames: + + <-- context->cfa + __sighndlr + sigacthandler + + */ + *nframes = 2; + + return 1; + } + + return 0; +} + +#define MD_FALLBACK_FRAME_STATE_FOR sparc_fallback_frame_state + +#endif + +static _Unwind_Reason_Code +MD_FALLBACK_FRAME_STATE_FOR (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + void *pc = context->ra; + struct frame *fp = (struct frame *) context->cfa; + int nframes; + void *this_cfa = context->cfa; + long new_cfa; + void *ra_location, *shifted_ra_location; + mcontext_t *mctx; + int i; + + /* Deal with frame-less function from which a signal was raised. */ + if (_Unwind_IsSignalFrame (context)) + { + /* The CFA is by definition unmodified in this case. */ + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = __builtin_dwarf_sp_column (); + fs->regs.cfa_offset = 0; + + /* This is the canonical RA column. */ + fs->retaddr_column = 15; + + return _URC_NO_REASON; + } + + if (IS_SIGHANDLER (pc, this_cfa, &nframes)) + { + struct handler_args { + struct frame frwin; + ucontext_t ucontext; + } *handler_args; + ucontext_t *ucp; + + /* context->cfa points into the frame after the saved frame pointer and + saved pc (struct frame). + + The ucontext_t structure is in the kernel frame after a struct + frame. Since the frame sizes vary even within OS releases, we + need to walk the stack to get there. */ + + for (i = 0; i < nframes; i++) + fp = (struct frame *) ((char *)fp->fr_savfp + STACK_BIAS); + + handler_args = (struct handler_args *) fp; + ucp = &handler_args->ucontext; + mctx = &ucp->uc_mcontext; + } + + /* Exit if the pattern at the return address does not match the + previous three patterns. */ + else + return _URC_END_OF_STACK; + + new_cfa = mctx->gregs[REG_SP]; + /* The frame address is %sp + STACK_BIAS in 64-bit mode. */ + new_cfa += STACK_BIAS; + + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = __builtin_dwarf_sp_column (); + fs->regs.cfa_offset = new_cfa - (long) this_cfa; + + /* Restore global and out registers (in this order) from the + ucontext_t structure, uc_mcontext.gregs field. */ + for (i = 1; i < 16; i++) + { + /* We never restore %sp as everything is purely CFA-based. */ + if ((unsigned int) i == __builtin_dwarf_sp_column ()) + continue; + + /* First the global registers and then the out registers. */ + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset = (long)&mctx->gregs[REG_Y + i] - new_cfa; + } + + /* Just above the stack pointer there are 16 extended words in which + the register window (in and local registers) was saved. */ + for (i = 0; i < 16; i++) + { + fs->regs.reg[i + 16].how = REG_SAVED_OFFSET; + fs->regs.reg[i + 16].loc.offset = i*sizeof(long); + } + + /* Check whether we need to restore FPU registers. */ + if (mctx->fpregs.fpu_qcnt) + { + for (i = 0; i < 32; i++) + { + fs->regs.reg[i + 32].how = REG_SAVED_OFFSET; + fs->regs.reg[i + 32].loc.offset + = (long)&mctx->fpregs.fpu_fr.fpu_regs[i] - new_cfa; + } + +#ifdef __arch64__ + /* For 64-bit, fpu_fr.fpu_dregs contains 32 instead of 16 doubles. */ + for (i = 32; i < 64; i++) + { + if (i > 32 && (i & 1)) + continue; + + fs->regs.reg[i + 32].how = REG_SAVED_OFFSET; + fs->regs.reg[i + 32].loc.offset + = (long)&mctx->fpregs.fpu_fr.fpu_dregs[i/2] - new_cfa; + } +#endif + } + + /* State the rules to find the kernel's code "return address", which is + the address of the active instruction when the signal was caught. + On the SPARC, since RETURN_ADDR_OFFSET (essentially 8) is defined, we + need to preventively subtract it from the purported return address. */ + ra_location = &mctx->gregs[REG_PC]; + shifted_ra_location = &mctx->gregs[REG_Y]; + *(void **)shifted_ra_location = *(void **)ra_location - 8; + fs->retaddr_column = 0; + fs->regs.reg[0].how = REG_SAVED_OFFSET; + fs->regs.reg[0].loc.offset = (long)shifted_ra_location - new_cfa; + fs->signal_frame = 1; + + return _URC_NO_REASON; +} diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h new file mode 100644 index 000000000..4c8edaf1f --- /dev/null +++ b/gcc/config/sparc/sol2.h @@ -0,0 +1,205 @@ +/* Definitions of target machine for GCC, for SPARC running Solaris 2 + Copyright 1992, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005, + 2006, 2007, 2008, 2010 Free Software Foundation, Inc. + Contributed by Ron Guilmette (rfg@netcom.com). + Additional changes by David V. Henkel-Wallace (gumby@cygnus.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Supposedly the same as vanilla sparc svr4, except for the stuff below: */ + +/* This is here rather than in sparc.h because it's not known what + other assemblers will accept. */ + +#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 +#undef ASM_CPU_DEFAULT_SPEC +#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plus" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc +#undef ASM_CPU_DEFAULT_SPEC +#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plusa" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 +#undef ASM_CPU_DEFAULT_SPEC +#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plusb" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara +#undef ASM_CPU_DEFAULT_SPEC +#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plusb" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara2 +#undef ASM_CPU_DEFAULT_SPEC +#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plusb" +#endif + +#undef ASM_CPU_SPEC +#define ASM_CPU_SPEC "\ +%{mcpu=v9:-xarch=v8plus} \ +%{mcpu=ultrasparc:-xarch=v8plusa} \ +%{mcpu=ultrasparc3:-xarch=v8plusb} \ +%{mcpu=niagara:-xarch=v8plusb} \ +%{mcpu=niagara2:-xarch=v8plusb} \ +%{!mcpu*:%(asm_cpu_default)} \ +" + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "startfile_arch", STARTFILE_ARCH_SPEC }, \ + { "link_arch", LINK_ARCH_SPEC } + +/* However it appears that Solaris 2.0 uses the same reg numbering as + the old BSD-style system did. */ + +/* The Solaris 2 assembler uses .skip, not .zero, so put this back. */ +#undef ASM_OUTPUT_SKIP +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + fprintf (FILE, "\t.skip %u\n", (int)(SIZE)) + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf ((LABEL), "*.L%s%lu", (PREFIX), (unsigned long)(NUM)) + +/* The native TLS-enabled assembler requires the directive #tls_object + to be put on objects in TLS sections (as of v7.1). This is not + required by the GNU assembler but supported on SPARC. */ +#undef ASM_DECLARE_OBJECT_NAME +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ + do \ + { \ + HOST_WIDE_INT size; \ + \ + if (targetm.have_tls && DECL_THREAD_LOCAL_P (DECL)) \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "tls_object"); \ + else \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + \ + size_directive_output = 0; \ + if (!flag_inhibit_size_directive \ + && (DECL) && DECL_SIZE (DECL)) \ + { \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size); \ + } \ + \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + } \ + while (0) + +/* The Solaris assembler cannot grok .stabd directives. */ +#undef NO_DBX_BNSYM_ENSYM +#define NO_DBX_BNSYM_ENSYM 1 + + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + crtend.o%s crtn.o%s" + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + Some Solaris dynamic linkers don't handle unaligned section relative + relocs properly, so force them to be aligned. */ +#ifndef HAVE_AS_SPARC_UA_PCREL +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + ((flag_pic || GLOBAL) ? DW_EH_PE_aligned : DW_EH_PE_absptr) +#endif + + +/* Define for support of TFmode long double. + SPARC ABI says that long double is 4 words. */ +#define LONG_DOUBLE_TYPE_SIZE 128 + +/* Solaris's _Qp_* library routine implementation clobbers the output + memory before the inputs are fully consumed. */ + +#undef TARGET_BUGGY_QP_LIB +#define TARGET_BUGGY_QP_LIB 1 + +#undef SUN_CONVERSION_LIBFUNCS +#define SUN_CONVERSION_LIBFUNCS 1 + +#undef DITF_CONVERSION_LIBFUNCS +#define DITF_CONVERSION_LIBFUNCS 1 + +#undef SUN_INTEGER_MULTIPLY_64 +#define SUN_INTEGER_MULTIPLY_64 1 + +/* Solaris allows 64-bit out and global registers to be used in 32-bit mode. + sparc_override_options will disable V8+ if either not generating V9 code + or generating 64-bit code. */ +#undef TARGET_DEFAULT +#ifdef TARGET_64BIT_DEFAULT +#define TARGET_DEFAULT \ + (MASK_V9 + MASK_64BIT + MASK_PTR64 + MASK_STACK_BIAS + \ + MASK_V8PLUS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128) +#else +#define TARGET_DEFAULT \ + (MASK_V8PLUS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128) +#endif + +/* Solaris-specific #pragmas are implemented on top of attributes. Hook in + the bits from config/sol2.c. */ +#define SUBTARGET_INSERT_ATTRIBUTES solaris_insert_attributes +#define SUBTARGET_ATTRIBUTE_TABLE SOLARIS_ATTRIBUTE_TABLE + +/* Register the Solaris-specific #pragma directives. */ +#define REGISTER_TARGET_PRAGMAS() solaris_register_pragmas () + +/* Output a simple call for .init/.fini. */ +#define ASM_OUTPUT_CALL(FILE, FN) \ + do \ + { \ + fprintf (FILE, "\tcall\t"); \ + print_operand (FILE, XEXP (DECL_RTL (FN), 0), 0); \ + fprintf (FILE, "\n\tnop\n"); \ + } \ + while (0) + +/* This is how to output an assembler line that says to advance + the location counter to a multiple of 2**LOG bytes using the + NOP instruction as padding. */ +#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE,LOG) \ + if ((LOG) != 0) \ + fprintf (FILE, "\t.align %d,0x1000000\n", (1<<(LOG))) + +/* Use Solaris ELF section syntax. */ +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION sparc_solaris_elf_asm_named_section + +/* And SPARC non-standard pushsection syntax. */ +#undef PUSHSECTION_FORMAT +#define PUSHSECTION_FORMAT "\t.pushsection\t\"%s\"\n" + +/* Static stack checking is supported by means of probes. */ +#define STACK_CHECK_STATIC_BUILTIN 1 + +#define MD_UNWIND_SUPPORT "config/sparc/sol2-unwind.h" diff --git a/gcc/config/sparc/sp-elf.h b/gcc/config/sparc/sp-elf.h new file mode 100644 index 000000000..d78eba3b5 --- /dev/null +++ b/gcc/config/sparc/sp-elf.h @@ -0,0 +1,69 @@ +/* Definitions of target machine for GCC, + for SPARC running in an embedded environment using the ELF file format. + Copyright (C) 2005, 2007, 2010, 2011 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (sparc-elf)") + +/* Don't assume anything about the header files. */ +#define NO_IMPLICIT_EXTERN_C + +/* It's safe to pass -s always, even if -g is not used. */ +#undef ASM_SPEC +#define ASM_SPEC \ + "-s \ + %{fpic|fpie|fPIC|fPIE:-K PIC} %(asm_cpu)" + +/* Use the default. */ +#undef LINK_SPEC + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + crtend.o%s crtn.o%s" + +/* Don't set the target flags, this is done by the linker script */ +#undef LIB_SPEC +#define LIB_SPEC "" + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf ((LABEL), "*.L%s%ld", (PREFIX), (long)(NUM)) + +/* ??? Inherited from sol2.h. Probably wrong. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE "long int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE BITS_PER_WORD + +/* ??? until fixed. */ +#undef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE 64 diff --git a/gcc/config/sparc/sp64-elf.h b/gcc/config/sparc/sp64-elf.h new file mode 100644 index 000000000..b21969386 --- /dev/null +++ b/gcc/config/sparc/sp64-elf.h @@ -0,0 +1,93 @@ +/* Definitions of target machine for GCC, for SPARC64, ELF. + Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2004, 2005, 2007, 2010, + 2011 + Free Software Foundation, Inc. + Contributed by Doug Evans, dje@cygnus.com. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (sparc64-elf)") + +/* A 64 bit v9 compiler in a Medium/Anywhere code model environment. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT \ +(MASK_V9 + MASK_PTR64 + MASK_64BIT + MASK_HARD_QUAD \ + + MASK_APP_REGS + MASK_FPU + MASK_STACK_BIAS + MASK_LONG_DOUBLE_128) + +#undef SPARC_DEFAULT_CMODEL +#define SPARC_DEFAULT_CMODEL CM_EMBMEDANY + +/* Don't assume anything about the header files. */ +#define NO_IMPLICIT_EXTERN_C + +/* __svr4__ is used by the C library (FIXME) */ +#undef CPP_SUBTARGET_SPEC +#define CPP_SUBTARGET_SPEC "-D__svr4__" + +#undef ASM_SPEC +#define ASM_SPEC "\ +-s %{fpic|fPIC|fpie|fPIE:-K PIC} \ +%{mlittle-endian:-EL} \ +%(asm_cpu) %(asm_arch) \ +" + +/* This is taken from sol2.h. */ +#undef LINK_SPEC +#define LINK_SPEC "\ +%{v:-V} \ +%{mlittle-endian:-EL} \ +" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + crtend.o%s crtn.o%s" + +/* Use the default (for now). */ +#undef LIB_SPEC + +#undef BYTES_BIG_ENDIAN +#define BYTES_BIG_ENDIAN (! TARGET_LITTLE_ENDIAN) + +#undef WORDS_BIG_ENDIAN +#define WORDS_BIG_ENDIAN (! TARGET_LITTLE_ENDIAN) + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf ((LABEL), "*.L%s%ld", (PREFIX), (long)(NUM)) + +/* ??? This should be 32 bits for v9 but what can we do? */ +#undef WCHAR_TYPE +#define WCHAR_TYPE "short unsigned int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 16 + +#undef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE 128 diff --git a/gcc/config/sparc/sparc-modes.def b/gcc/config/sparc/sparc-modes.def new file mode 100644 index 000000000..628470086 --- /dev/null +++ b/gcc/config/sparc/sparc-modes.def @@ -0,0 +1,47 @@ +/* Definitions of target machine for GCC, for Sun SPARC. + Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc. + Contributed by Michael Tiemann (tiemann@cygnus.com). + 64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, + at Cygnus Support. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* 128-bit floating point */ +FLOAT_MODE (TF, 16, ieee_quad_format); + +/* Add any extra modes needed to represent the condition code. + + On the SPARC, we have a "no-overflow" mode which is used when an add or + subtract insn is used to set the condition code. Different branches are + used in this case for some operations. + + We also have two modes to indicate that the relevant condition code is + in the floating-point condition code register. One for comparisons which + will generate an exception if the result is unordered (CCFPEmode) and + one for comparisons which will never trap (CCFPmode). + + CCXmode and CCX_NOOVmode are only used by v9. */ + +CC_MODE (CCX); +CC_MODE (CC_NOOV); +CC_MODE (CCX_NOOV); +CC_MODE (CCFP); +CC_MODE (CCFPE); + +/* Vector modes. */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h new file mode 100644 index 000000000..d37823f56 --- /dev/null +++ b/gcc/config/sparc/sparc-protos.h @@ -0,0 +1,108 @@ +/* Prototypes of target machine for SPARC. + Copyright (C) 1999, 2000, 2003, 2004, 2005, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Michael Tiemann (tiemann@cygnus.com). + 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, + at Cygnus Support. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef __SPARC_PROTOS_H__ +#define __SPARC_PROTOS_H__ + +#ifdef TREE_CODE +#ifdef RTX_CODE +extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); +#endif +extern unsigned long sparc_type_code (tree); +#ifdef ARGS_SIZE_RTX +/* expr.h defines ARGS_SIZE_RTX and `enum direction' */ +extern enum direction function_arg_padding (enum machine_mode, const_tree); +#endif /* ARGS_SIZE_RTX */ +#endif /* TREE_CODE */ + +extern void order_regs_for_local_alloc (void); +extern HOST_WIDE_INT sparc_compute_frame_size (HOST_WIDE_INT, int); +extern void sparc_expand_prologue (void); +extern void sparc_expand_epilogue (void); +extern bool sparc_can_use_return_insn_p (void); +extern int check_pic (int); +extern int short_branch (int, int); +extern void sparc_profile_hook (int); +extern void sparc_override_options (void); +extern void sparc_output_scratch_registers (FILE *); + +#ifdef RTX_CODE +extern enum machine_mode select_cc_mode (enum rtx_code, rtx, rtx); +/* Define the function that build the compare insn for scc and bcc. */ +extern rtx gen_compare_reg (rtx cmp); +extern rtx sparc_emit_float_lib_cmp (rtx, rtx, enum rtx_code); +extern void sparc_emit_floatunsdi (rtx [2], enum machine_mode); +extern void sparc_emit_fixunsdi (rtx [2], enum machine_mode); +extern void emit_tfmode_binop (enum rtx_code, rtx *); +extern void emit_tfmode_unop (enum rtx_code, rtx *); +extern void emit_tfmode_cvt (enum rtx_code, rtx *); +extern bool legitimate_constant_p (rtx); +extern bool constant_address_p (rtx); +extern bool legitimate_pic_operand_p (rtx); +extern rtx sparc_legitimize_reload_address (rtx, enum machine_mode, int, int, + int, int *win); +extern void sparc_emit_call_insn (rtx, rtx); +extern void sparc_defer_case_vector (rtx, rtx, int); +extern bool sparc_expand_move (enum machine_mode, rtx *); +extern void sparc_emit_set_symbolic_const64 (rtx, rtx, rtx); +extern int sparc_splitdi_legitimate (rtx, rtx); +extern int sparc_absnegfloat_split_legitimate (rtx, rtx); +extern const char *output_ubranch (rtx, int, rtx); +extern const char *output_cbranch (rtx, rtx, int, int, int, rtx); +extern const char *output_return (rtx); +extern const char *output_sibcall (rtx, rtx); +extern const char *output_v8plus_shift (rtx *, rtx, const char *); +extern const char *output_v9branch (rtx, rtx, int, int, int, int, rtx); +extern const char *output_probe_stack_range (rtx, rtx); +extern bool emit_scc_insn (rtx []); +extern void emit_conditional_branch_insn (rtx []); +extern void print_operand (FILE *, rtx, int); +extern int mems_ok_for_ldd_peep (rtx, rtx, rtx); +extern int arith_double_4096_operand (rtx, enum machine_mode); +extern int arith_4096_operand (rtx, enum machine_mode); +extern int zero_operand (rtx, enum machine_mode); +extern int fp_zero_operand (rtx, enum machine_mode); +extern int reg_or_0_operand (rtx, enum machine_mode); +extern int empty_delay_slot (rtx); +extern int eligible_for_return_delay (rtx); +extern int eligible_for_sibcall_delay (rtx); +extern int tls_call_delay (rtx); +extern int emit_move_sequence (rtx, enum machine_mode); +extern int fp_sethi_p (rtx); +extern int fp_mov_p (rtx); +extern int fp_high_losum_p (rtx); +extern int mem_min_alignment (rtx, int); +extern int pic_address_needs_scratch (rtx); +extern int reg_unused_after (rtx, rtx); +extern int register_ok_for_ldd (rtx); +extern int memory_ok_for_ldd (rtx); +extern int registers_ok_for_ldd_peep (rtx, rtx); +extern int v9_regcmp_p (enum rtx_code); +/* Function used for V8+ code generation. Returns 1 if the high + 32 bits of REG are 0 before INSN. */ +extern int sparc_check_64 (rtx, rtx); +extern rtx gen_df_reg (rtx, int); +extern void sparc_expand_compare_and_swap_12 (rtx, rtx, rtx, rtx); +#endif /* RTX_CODE */ + +#endif /* __SPARC_PROTOS_H__ */ diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c new file mode 100644 index 000000000..9682609fe --- /dev/null +++ b/gcc/config/sparc/sparc.c @@ -0,0 +1,9873 @@ +/* Subroutines for insn-output.c for SPARC. + Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, + 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Michael Tiemann (tiemann@cygnus.com) + 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, + at Cygnus Support. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "insn-codes.h" +#include "conditions.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "function.h" +#include "except.h" +#include "expr.h" +#include "optabs.h" +#include "recog.h" +#include "diagnostic-core.h" +#include "ggc.h" +#include "tm_p.h" +#include "debug.h" +#include "target.h" +#include "target-def.h" +#include "cfglayout.h" +#include "gimple.h" +#include "langhooks.h" +#include "reload.h" +#include "params.h" +#include "df.h" +#include "dwarf2out.h" + +/* Processor costs */ +static const +struct processor_costs cypress_costs = { + COSTS_N_INSNS (2), /* int load */ + COSTS_N_INSNS (2), /* int signed load */ + COSTS_N_INSNS (2), /* int zeroed load */ + COSTS_N_INSNS (2), /* float load */ + COSTS_N_INSNS (5), /* fmov, fneg, fabs */ + COSTS_N_INSNS (5), /* fadd, fsub */ + COSTS_N_INSNS (1), /* fcmp */ + COSTS_N_INSNS (1), /* fmov, fmovr */ + COSTS_N_INSNS (7), /* fmul */ + COSTS_N_INSNS (37), /* fdivs */ + COSTS_N_INSNS (37), /* fdivd */ + COSTS_N_INSNS (63), /* fsqrts */ + COSTS_N_INSNS (63), /* fsqrtd */ + COSTS_N_INSNS (1), /* imul */ + COSTS_N_INSNS (1), /* imulX */ + 0, /* imul bit factor */ + COSTS_N_INSNS (1), /* idiv */ + COSTS_N_INSNS (1), /* idivX */ + COSTS_N_INSNS (1), /* movcc/movr */ + 0, /* shift penalty */ +}; + +static const +struct processor_costs supersparc_costs = { + COSTS_N_INSNS (1), /* int load */ + COSTS_N_INSNS (1), /* int signed load */ + COSTS_N_INSNS (1), /* int zeroed load */ + COSTS_N_INSNS (0), /* float load */ + COSTS_N_INSNS (3), /* fmov, fneg, fabs */ + COSTS_N_INSNS (3), /* fadd, fsub */ + COSTS_N_INSNS (3), /* fcmp */ + COSTS_N_INSNS (1), /* fmov, fmovr */ + COSTS_N_INSNS (3), /* fmul */ + COSTS_N_INSNS (6), /* fdivs */ + COSTS_N_INSNS (9), /* fdivd */ + COSTS_N_INSNS (12), /* fsqrts */ + COSTS_N_INSNS (12), /* fsqrtd */ + COSTS_N_INSNS (4), /* imul */ + COSTS_N_INSNS (4), /* imulX */ + 0, /* imul bit factor */ + COSTS_N_INSNS (4), /* idiv */ + COSTS_N_INSNS (4), /* idivX */ + COSTS_N_INSNS (1), /* movcc/movr */ + 1, /* shift penalty */ +}; + +static const +struct processor_costs hypersparc_costs = { + COSTS_N_INSNS (1), /* int load */ + COSTS_N_INSNS (1), /* int signed load */ + COSTS_N_INSNS (1), /* int zeroed load */ + COSTS_N_INSNS (1), /* float load */ + COSTS_N_INSNS (1), /* fmov, fneg, fabs */ + COSTS_N_INSNS (1), /* fadd, fsub */ + COSTS_N_INSNS (1), /* fcmp */ + COSTS_N_INSNS (1), /* fmov, fmovr */ + COSTS_N_INSNS (1), /* fmul */ + COSTS_N_INSNS (8), /* fdivs */ + COSTS_N_INSNS (12), /* fdivd */ + COSTS_N_INSNS (17), /* fsqrts */ + COSTS_N_INSNS (17), /* fsqrtd */ + COSTS_N_INSNS (17), /* imul */ + COSTS_N_INSNS (17), /* imulX */ + 0, /* imul bit factor */ + COSTS_N_INSNS (17), /* idiv */ + COSTS_N_INSNS (17), /* idivX */ + COSTS_N_INSNS (1), /* movcc/movr */ + 0, /* shift penalty */ +}; + +static const +struct processor_costs leon_costs = { + COSTS_N_INSNS (1), /* int load */ + COSTS_N_INSNS (1), /* int signed load */ + COSTS_N_INSNS (1), /* int zeroed load */ + COSTS_N_INSNS (1), /* float load */ + COSTS_N_INSNS (1), /* fmov, fneg, fabs */ + COSTS_N_INSNS (1), /* fadd, fsub */ + COSTS_N_INSNS (1), /* fcmp */ + COSTS_N_INSNS (1), /* fmov, fmovr */ + COSTS_N_INSNS (1), /* fmul */ + COSTS_N_INSNS (15), /* fdivs */ + COSTS_N_INSNS (15), /* fdivd */ + COSTS_N_INSNS (23), /* fsqrts */ + COSTS_N_INSNS (23), /* fsqrtd */ + COSTS_N_INSNS (5), /* imul */ + COSTS_N_INSNS (5), /* imulX */ + 0, /* imul bit factor */ + COSTS_N_INSNS (5), /* idiv */ + COSTS_N_INSNS (5), /* idivX */ + COSTS_N_INSNS (1), /* movcc/movr */ + 0, /* shift penalty */ +}; + +static const +struct processor_costs sparclet_costs = { + COSTS_N_INSNS (3), /* int load */ + COSTS_N_INSNS (3), /* int signed load */ + COSTS_N_INSNS (1), /* int zeroed load */ + COSTS_N_INSNS (1), /* float load */ + COSTS_N_INSNS (1), /* fmov, fneg, fabs */ + COSTS_N_INSNS (1), /* fadd, fsub */ + COSTS_N_INSNS (1), /* fcmp */ + COSTS_N_INSNS (1), /* fmov, fmovr */ + COSTS_N_INSNS (1), /* fmul */ + COSTS_N_INSNS (1), /* fdivs */ + COSTS_N_INSNS (1), /* fdivd */ + COSTS_N_INSNS (1), /* fsqrts */ + COSTS_N_INSNS (1), /* fsqrtd */ + COSTS_N_INSNS (5), /* imul */ + COSTS_N_INSNS (5), /* imulX */ + 0, /* imul bit factor */ + COSTS_N_INSNS (5), /* idiv */ + COSTS_N_INSNS (5), /* idivX */ + COSTS_N_INSNS (1), /* movcc/movr */ + 0, /* shift penalty */ +}; + +static const +struct processor_costs ultrasparc_costs = { + COSTS_N_INSNS (2), /* int load */ + COSTS_N_INSNS (3), /* int signed load */ + COSTS_N_INSNS (2), /* int zeroed load */ + COSTS_N_INSNS (2), /* float load */ + COSTS_N_INSNS (1), /* fmov, fneg, fabs */ + COSTS_N_INSNS (4), /* fadd, fsub */ + COSTS_N_INSNS (1), /* fcmp */ + COSTS_N_INSNS (2), /* fmov, fmovr */ + COSTS_N_INSNS (4), /* fmul */ + COSTS_N_INSNS (13), /* fdivs */ + COSTS_N_INSNS (23), /* fdivd */ + COSTS_N_INSNS (13), /* fsqrts */ + COSTS_N_INSNS (23), /* fsqrtd */ + COSTS_N_INSNS (4), /* imul */ + COSTS_N_INSNS (4), /* imulX */ + 2, /* imul bit factor */ + COSTS_N_INSNS (37), /* idiv */ + COSTS_N_INSNS (68), /* idivX */ + COSTS_N_INSNS (2), /* movcc/movr */ + 2, /* shift penalty */ +}; + +static const +struct processor_costs ultrasparc3_costs = { + COSTS_N_INSNS (2), /* int load */ + COSTS_N_INSNS (3), /* int signed load */ + COSTS_N_INSNS (3), /* int zeroed load */ + COSTS_N_INSNS (2), /* float load */ + COSTS_N_INSNS (3), /* fmov, fneg, fabs */ + COSTS_N_INSNS (4), /* fadd, fsub */ + COSTS_N_INSNS (5), /* fcmp */ + COSTS_N_INSNS (3), /* fmov, fmovr */ + COSTS_N_INSNS (4), /* fmul */ + COSTS_N_INSNS (17), /* fdivs */ + COSTS_N_INSNS (20), /* fdivd */ + COSTS_N_INSNS (20), /* fsqrts */ + COSTS_N_INSNS (29), /* fsqrtd */ + COSTS_N_INSNS (6), /* imul */ + COSTS_N_INSNS (6), /* imulX */ + 0, /* imul bit factor */ + COSTS_N_INSNS (40), /* idiv */ + COSTS_N_INSNS (71), /* idivX */ + COSTS_N_INSNS (2), /* movcc/movr */ + 0, /* shift penalty */ +}; + +static const +struct processor_costs niagara_costs = { + COSTS_N_INSNS (3), /* int load */ + COSTS_N_INSNS (3), /* int signed load */ + COSTS_N_INSNS (3), /* int zeroed load */ + COSTS_N_INSNS (9), /* float load */ + COSTS_N_INSNS (8), /* fmov, fneg, fabs */ + COSTS_N_INSNS (8), /* fadd, fsub */ + COSTS_N_INSNS (26), /* fcmp */ + COSTS_N_INSNS (8), /* fmov, fmovr */ + COSTS_N_INSNS (29), /* fmul */ + COSTS_N_INSNS (54), /* fdivs */ + COSTS_N_INSNS (83), /* fdivd */ + COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */ + COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */ + COSTS_N_INSNS (11), /* imul */ + COSTS_N_INSNS (11), /* imulX */ + 0, /* imul bit factor */ + COSTS_N_INSNS (72), /* idiv */ + COSTS_N_INSNS (72), /* idivX */ + COSTS_N_INSNS (1), /* movcc/movr */ + 0, /* shift penalty */ +}; + +static const +struct processor_costs niagara2_costs = { + COSTS_N_INSNS (3), /* int load */ + COSTS_N_INSNS (3), /* int signed load */ + COSTS_N_INSNS (3), /* int zeroed load */ + COSTS_N_INSNS (3), /* float load */ + COSTS_N_INSNS (6), /* fmov, fneg, fabs */ + COSTS_N_INSNS (6), /* fadd, fsub */ + COSTS_N_INSNS (6), /* fcmp */ + COSTS_N_INSNS (6), /* fmov, fmovr */ + COSTS_N_INSNS (6), /* fmul */ + COSTS_N_INSNS (19), /* fdivs */ + COSTS_N_INSNS (33), /* fdivd */ + COSTS_N_INSNS (19), /* fsqrts */ + COSTS_N_INSNS (33), /* fsqrtd */ + COSTS_N_INSNS (5), /* imul */ + COSTS_N_INSNS (5), /* imulX */ + 0, /* imul bit factor */ + COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */ + COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */ + COSTS_N_INSNS (1), /* movcc/movr */ + 0, /* shift penalty */ +}; + +const struct processor_costs *sparc_costs = &cypress_costs; + +#ifdef HAVE_AS_RELAX_OPTION +/* If 'as' and 'ld' are relaxing tail call insns into branch always, use + "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized. + With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if + somebody does not branch between the sethi and jmp. */ +#define LEAF_SIBCALL_SLOT_RESERVED_P 1 +#else +#define LEAF_SIBCALL_SLOT_RESERVED_P \ + ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic) +#endif + +/* Global variables for machine-dependent things. */ + +/* Size of frame. Need to know this to emit return insns from leaf procedures. + ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the + reload pass. This is important as the value is later used for scheduling + (to see what can go in a delay slot). + APPARENT_FSIZE is the size of the stack less the register save area and less + the outgoing argument area. It is used when saving call preserved regs. */ +static HOST_WIDE_INT apparent_fsize; +static HOST_WIDE_INT actual_fsize; + +/* Number of live general or floating point registers needed to be + saved (as 4-byte quantities). */ +static int num_gfregs; + +/* Vector to say how input registers are mapped to output registers. + HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to + eliminate it. You must use -fomit-frame-pointer to get that. */ +char leaf_reg_remap[] = +{ 0, 1, 2, 3, 4, 5, 6, 7, + -1, -1, -1, -1, -1, -1, 14, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 8, 9, 10, 11, 12, 13, -1, 15, + + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100}; + +/* Vector, indexed by hard register number, which contains 1 + for a register that is allowable in a candidate for leaf + function treatment. */ +char sparc_leaf_regs[] = +{ 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1}; + +struct GTY(()) machine_function +{ + /* Some local-dynamic TLS symbol name. */ + const char *some_ld_name; + + /* True if the current function is leaf and uses only leaf regs, + so that the SPARC leaf function optimization can be applied. + Private version of current_function_uses_only_leaf_regs, see + sparc_expand_prologue for the rationale. */ + int leaf_function_p; + + /* True if the data calculated by sparc_expand_prologue are valid. */ + bool prologue_data_valid_p; +}; + +#define sparc_leaf_function_p cfun->machine->leaf_function_p +#define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p + +/* Register we pretend to think the frame pointer is allocated to. + Normally, this is %fp, but if we are in a leaf procedure, this + is %sp+"something". We record "something" separately as it may + be too big for reg+constant addressing. */ +static rtx frame_base_reg; +static HOST_WIDE_INT frame_base_offset; + +/* 1 if the next opcode is to be specially indented. */ +int sparc_indent_opcode = 0; + +static bool sparc_handle_option (size_t, const char *, int); +static void sparc_option_override (void); +static void sparc_init_modes (void); +static void scan_record_type (const_tree, int *, int *, int *); +static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool, bool, int *, int *); + +static int supersparc_adjust_cost (rtx, rtx, rtx, int); +static int hypersparc_adjust_cost (rtx, rtx, rtx, int); + +static void sparc_emit_set_const32 (rtx, rtx); +static void sparc_emit_set_const64 (rtx, rtx); +static void sparc_output_addr_vec (rtx); +static void sparc_output_addr_diff_vec (rtx); +static void sparc_output_deferred_case_vectors (void); +static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool); +static rtx sparc_builtin_saveregs (void); +static int epilogue_renumber (rtx *, int); +static bool sparc_assemble_integer (rtx, unsigned int, int); +static int set_extends (rtx); +static void load_got_register (void); +static int save_or_restore_regs (int, int, rtx, int, int); +static void emit_save_or_restore_regs (int); +static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT); +static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT); +static void sparc_solaris_elf_asm_named_section (const char *, unsigned int, + tree) ATTRIBUTE_UNUSED; +static int sparc_adjust_cost (rtx, rtx, rtx, int); +static int sparc_issue_rate (void); +static void sparc_sched_init (FILE *, int, int); +static int sparc_use_sched_lookahead (void); + +static void emit_soft_tfmode_libcall (const char *, int, rtx *); +static void emit_soft_tfmode_binop (enum rtx_code, rtx *); +static void emit_soft_tfmode_unop (enum rtx_code, rtx *); +static void emit_soft_tfmode_cvt (enum rtx_code, rtx *); +static void emit_hard_tfmode_operation (enum rtx_code, rtx *); + +static bool sparc_function_ok_for_sibcall (tree, tree); +static void sparc_init_libfuncs (void); +static void sparc_init_builtins (void); +static void sparc_vis_init_builtins (void); +static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int); +static tree sparc_fold_builtin (tree, int, tree *, bool); +static int sparc_vis_mul8x16 (int, int); +static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree); +static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, + HOST_WIDE_INT, tree); +static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT, + HOST_WIDE_INT, const_tree); +static void sparc_reorg (void); +static struct machine_function * sparc_init_machine_status (void); +static bool sparc_cannot_force_const_mem (rtx); +static rtx sparc_tls_get_addr (void); +static rtx sparc_tls_got (void); +static const char *get_some_local_dynamic_name (void); +static int get_some_local_dynamic_name_1 (rtx *, void *); +static bool sparc_rtx_costs (rtx, int, int, int *, bool); +static rtx sparc_function_value (const_tree, const_tree, bool); +static rtx sparc_libcall_value (enum machine_mode, const_rtx); +static bool sparc_function_value_regno_p (const unsigned int); +static rtx sparc_struct_value_rtx (tree, int); +static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode, + int *, const_tree, int); +static bool sparc_return_in_memory (const_tree, const_tree); +static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *); +static void sparc_va_start (tree, rtx); +static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); +static bool sparc_vector_mode_supported_p (enum machine_mode); +static bool sparc_tls_referenced_p (rtx); +static rtx sparc_legitimize_tls_address (rtx); +static rtx sparc_legitimize_pic_address (rtx, rtx); +static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode); +static rtx sparc_delegitimize_address (rtx); +static bool sparc_mode_dependent_address_p (const_rtx); +static bool sparc_pass_by_reference (CUMULATIVE_ARGS *, + enum machine_mode, const_tree, bool); +static void sparc_function_arg_advance (CUMULATIVE_ARGS *, + enum machine_mode, const_tree, bool); +static rtx sparc_function_arg_1 (const CUMULATIVE_ARGS *, + enum machine_mode, const_tree, bool, bool); +static rtx sparc_function_arg (CUMULATIVE_ARGS *, + enum machine_mode, const_tree, bool); +static rtx sparc_function_incoming_arg (CUMULATIVE_ARGS *, + enum machine_mode, const_tree, bool); +static unsigned int sparc_function_arg_boundary (enum machine_mode, + const_tree); +static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *, + enum machine_mode, tree, bool); +static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int); +static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; +static void sparc_file_end (void); +static bool sparc_frame_pointer_required (void); +static bool sparc_can_eliminate (const int, const int); +static void sparc_conditional_register_usage (void); +#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING +static const char *sparc_mangle_type (const_tree); +#endif +static void sparc_trampoline_init (rtx, tree, rtx); +static enum machine_mode sparc_preferred_simd_mode (enum machine_mode); + +#ifdef SUBTARGET_ATTRIBUTE_TABLE +/* Table of valid machine attributes. */ +static const struct attribute_spec sparc_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + SUBTARGET_ATTRIBUTE_TABLE, + { NULL, 0, 0, false, false, false, NULL } +}; +#endif + +/* Option handling. */ + +/* Parsed value. */ +enum cmodel sparc_cmodel; + +char sparc_hard_reg_printed[8]; + +struct sparc_cpu_select sparc_select[] = +{ + /* switch name, tune arch */ + { (char *)0, "default", 1, 1 }, + { (char *)0, "-mcpu=", 1, 1 }, + { (char *)0, "-mtune=", 1, 0 }, + { 0, 0, 0, 0 } +}; + +/* CPU type. This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx. */ +enum processor_type sparc_cpu; + +/* Whether an FPU option was specified. */ +static bool fpu_option_set = false; + +/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ +static const struct default_options sparc_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +/* Initialize the GCC target structure. */ + +/* The default is to use .half rather than .short for aligned HI objects. */ +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" + +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t" +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t" +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t" + +/* The target hook has to handle DI-mode values. */ +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER sparc_assemble_integer + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT sparc_sched_init +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS sparc_init_builtins + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address +#undef TARGET_MODE_DEPENDENT_ADDRESS_P +#define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN sparc_expand_builtin +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN sparc_fold_builtin + +#if TARGET_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS true +#endif + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG sparc_reorg + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS sparc_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE sparc_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE sparc_libcall_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p + +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG sparc_function_arg +#undef TARGET_FUNCTION_INCOMING_ARG +#define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary + +#undef TARGET_EXPAND_BUILTIN_SAVEREGS +#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming + +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg + +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p + +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode + +#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC +#define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec + +#ifdef SUBTARGET_INSERT_ATTRIBUTES +#undef TARGET_INSERT_ATTRIBUTES +#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES +#endif + +#ifdef SUBTARGET_ATTRIBUTE_TABLE +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table +#endif + +#undef TARGET_RELAXED_ORDERING +#define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION sparc_handle_option +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE sparc_option_override +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE sparc_option_optimization_table + +#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL) +#undef TARGET_ASM_OUTPUT_DWARF_DTPREL +#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel +#endif + +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END sparc_file_end + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE sparc_can_eliminate + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage + +#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING +#undef TARGET_MANGLE_TYPE +#define TARGET_MANGLE_TYPE sparc_mangle_type +#endif + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p + +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) +{ + switch (code) + { + case OPT_mfpu: + case OPT_mhard_float: + case OPT_msoft_float: + fpu_option_set = true; + break; + + case OPT_mcpu_: + sparc_select[1].string = arg; + break; + + case OPT_mtune_: + sparc_select[2].string = arg; + break; + } + + return true; +} + +/* Validate and override various options, and do some machine dependent + initialization. */ + +static void +sparc_option_override (void) +{ + static struct code_model { + const char *const name; + const enum cmodel value; + } const cmodels[] = { + { "32", CM_32 }, + { "medlow", CM_MEDLOW }, + { "medmid", CM_MEDMID }, + { "medany", CM_MEDANY }, + { "embmedany", CM_EMBMEDANY }, + { NULL, (enum cmodel) 0 } + }; + const struct code_model *cmodel; + /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */ + static struct cpu_default { + const int cpu; + const char *const name; + } const cpu_default[] = { + /* There must be one entry here for each TARGET_CPU value. */ + { TARGET_CPU_sparc, "cypress" }, + { TARGET_CPU_v8, "v8" }, + { TARGET_CPU_supersparc, "supersparc" }, + { TARGET_CPU_hypersparc, "hypersparc" }, + { TARGET_CPU_leon, "leon" }, + { TARGET_CPU_sparclite, "f930" }, + { TARGET_CPU_sparclite86x, "sparclite86x" }, + { TARGET_CPU_sparclet, "tsc701" }, + { TARGET_CPU_v9, "v9" }, + { TARGET_CPU_ultrasparc, "ultrasparc" }, + { TARGET_CPU_ultrasparc3, "ultrasparc3" }, + { TARGET_CPU_niagara, "niagara" }, + { TARGET_CPU_niagara2, "niagara2" }, + { 0, 0 } + }; + const struct cpu_default *def; + /* Table of values for -m{cpu,tune}=. */ + static struct cpu_table { + const char *const name; + const enum processor_type processor; + const int disable; + const int enable; + } const cpu_table[] = { + { "v7", PROCESSOR_V7, MASK_ISA, 0 }, + { "cypress", PROCESSOR_CYPRESS, MASK_ISA, 0 }, + { "v8", PROCESSOR_V8, MASK_ISA, MASK_V8 }, + /* TI TMS390Z55 supersparc */ + { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 }, + { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU }, + /* LEON */ + { "leon", PROCESSOR_LEON, MASK_ISA, MASK_V8|MASK_FPU }, + { "sparclite", PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE }, + /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */ + { "f930", PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE }, + /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */ + { "f934", PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU }, + { "sparclite86x", PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU, + MASK_SPARCLITE }, + { "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET }, + /* TEMIC sparclet */ + { "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET }, + { "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 }, + /* UltraSPARC I, II, IIi */ + { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, + /* Although insns using %y are deprecated, it is a clear win. */ + MASK_V9|MASK_DEPRECATED_V8_INSNS}, + /* UltraSPARC III */ + /* ??? Check if %y issue still holds true. */ + { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, + MASK_V9|MASK_DEPRECATED_V8_INSNS}, + /* UltraSPARC T1 */ + { "niagara", PROCESSOR_NIAGARA, MASK_ISA, + MASK_V9|MASK_DEPRECATED_V8_INSNS}, + /* UltraSPARC T2 */ + { "niagara2", PROCESSOR_NIAGARA2, MASK_ISA, MASK_V9}, + { 0, (enum processor_type) 0, 0, 0 } + }; + const struct cpu_table *cpu; + const struct sparc_cpu_select *sel; + int fpu; + +#ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; +#endif + +#ifndef SPARC_BI_ARCH + /* Check for unsupported architecture size. */ + if (! TARGET_64BIT != DEFAULT_ARCH32_P) + error ("%s is not supported by this configuration", + DEFAULT_ARCH32_P ? "-m64" : "-m32"); +#endif + + /* We force all 64bit archs to use 128 bit long double */ + if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128) + { + error ("-mlong-double-64 not allowed with -m64"); + target_flags |= MASK_LONG_DOUBLE_128; + } + + /* Code model selection. */ + sparc_cmodel = SPARC_DEFAULT_CMODEL; + +#ifdef SPARC_BI_ARCH + if (TARGET_ARCH32) + sparc_cmodel = CM_32; +#endif + + if (sparc_cmodel_string != NULL) + { + if (TARGET_ARCH64) + { + for (cmodel = &cmodels[0]; cmodel->name; cmodel++) + if (strcmp (sparc_cmodel_string, cmodel->name) == 0) + break; + if (cmodel->name == NULL) + error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string); + else + sparc_cmodel = cmodel->value; + } + else + error ("-mcmodel= is not supported on 32 bit systems"); + } + + fpu = target_flags & MASK_FPU; /* save current -mfpu status */ + + /* Set the default CPU. */ + for (def = &cpu_default[0]; def->name; ++def) + if (def->cpu == TARGET_CPU_DEFAULT) + break; + gcc_assert (def->name); + sparc_select[0].string = def->name; + + for (sel = &sparc_select[0]; sel->name; ++sel) + { + if (sel->string) + { + for (cpu = &cpu_table[0]; cpu->name; ++cpu) + if (! strcmp (sel->string, cpu->name)) + { + if (sel->set_tune_p) + sparc_cpu = cpu->processor; + + if (sel->set_arch_p) + { + target_flags &= ~cpu->disable; + target_flags |= cpu->enable; + } + break; + } + + if (! cpu->name) + error ("bad value (%s) for %s switch", sel->string, sel->name); + } + } + + /* If -mfpu or -mno-fpu was explicitly used, don't override with + the processor default. */ + if (fpu_option_set) + target_flags = (target_flags & ~MASK_FPU) | fpu; + + /* Don't allow -mvis if FPU is disabled. */ + if (! TARGET_FPU) + target_flags &= ~MASK_VIS; + + /* -mvis assumes UltraSPARC+, so we are sure v9 instructions + are available. + -m64 also implies v9. */ + if (TARGET_VIS || TARGET_ARCH64) + { + target_flags |= MASK_V9; + target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE); + } + + /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */ + if (TARGET_V9 && TARGET_ARCH32) + target_flags |= MASK_DEPRECATED_V8_INSNS; + + /* V8PLUS requires V9, makes no sense in 64 bit mode. */ + if (! TARGET_V9 || TARGET_ARCH64) + target_flags &= ~MASK_V8PLUS; + + /* Don't use stack biasing in 32 bit mode. */ + if (TARGET_ARCH32) + target_flags &= ~MASK_STACK_BIAS; + + /* Supply a default value for align_functions. */ + if (align_functions == 0 + && (sparc_cpu == PROCESSOR_ULTRASPARC + || sparc_cpu == PROCESSOR_ULTRASPARC3 + || sparc_cpu == PROCESSOR_NIAGARA + || sparc_cpu == PROCESSOR_NIAGARA2)) + align_functions = 32; + + /* Validate PCC_STRUCT_RETURN. */ + if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN) + flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1); + + /* Only use .uaxword when compiling for a 64-bit target. */ + if (!TARGET_ARCH64) + targetm.asm_out.unaligned_op.di = NULL; + + /* Do various machine dependent initializations. */ + sparc_init_modes (); + + /* Set up function hooks. */ + init_machine_status = sparc_init_machine_status; + + switch (sparc_cpu) + { + case PROCESSOR_V7: + case PROCESSOR_CYPRESS: + sparc_costs = &cypress_costs; + break; + case PROCESSOR_V8: + case PROCESSOR_SPARCLITE: + case PROCESSOR_SUPERSPARC: + sparc_costs = &supersparc_costs; + break; + case PROCESSOR_F930: + case PROCESSOR_F934: + case PROCESSOR_HYPERSPARC: + case PROCESSOR_SPARCLITE86X: + sparc_costs = &hypersparc_costs; + break; + case PROCESSOR_LEON: + sparc_costs = &leon_costs; + break; + case PROCESSOR_SPARCLET: + case PROCESSOR_TSC701: + sparc_costs = &sparclet_costs; + break; + case PROCESSOR_V9: + case PROCESSOR_ULTRASPARC: + sparc_costs = &ultrasparc_costs; + break; + case PROCESSOR_ULTRASPARC3: + sparc_costs = &ultrasparc3_costs; + break; + case PROCESSOR_NIAGARA: + sparc_costs = &niagara_costs; + break; + case PROCESSOR_NIAGARA2: + sparc_costs = &niagara2_costs; + break; + }; + +#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 + if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) + target_flags |= MASK_LONG_DOUBLE_128; +#endif + + maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, + ((sparc_cpu == PROCESSOR_ULTRASPARC + || sparc_cpu == PROCESSOR_NIAGARA + || sparc_cpu == PROCESSOR_NIAGARA2) + ? 2 + : (sparc_cpu == PROCESSOR_ULTRASPARC3 + ? 8 : 3)), + global_options.x_param_values, + global_options_set.x_param_values); + maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, + ((sparc_cpu == PROCESSOR_ULTRASPARC + || sparc_cpu == PROCESSOR_ULTRASPARC3 + || sparc_cpu == PROCESSOR_NIAGARA + || sparc_cpu == PROCESSOR_NIAGARA2) + ? 64 : 32), + global_options.x_param_values, + global_options_set.x_param_values); + + /* Disable save slot sharing for call-clobbered registers by default. + The IRA sharing algorithm works on single registers only and this + pessimizes for double floating-point registers. */ + if (!global_options_set.x_flag_ira_share_save_slots) + flag_ira_share_save_slots = 0; +} + +/* Miscellaneous utilities. */ + +/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move + or branch on register contents instructions. */ + +int +v9_regcmp_p (enum rtx_code code) +{ + return (code == EQ || code == NE || code == GE || code == LT + || code == LE || code == GT); +} + +/* Nonzero if OP is a floating point constant which can + be loaded into an integer register using a single + sethi instruction. */ + +int +fp_sethi_p (rtx op) +{ + if (GET_CODE (op) == CONST_DOUBLE) + { + REAL_VALUE_TYPE r; + long i; + + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + REAL_VALUE_TO_TARGET_SINGLE (r, i); + return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i); + } + + return 0; +} + +/* Nonzero if OP is a floating point constant which can + be loaded into an integer register using a single + mov instruction. */ + +int +fp_mov_p (rtx op) +{ + if (GET_CODE (op) == CONST_DOUBLE) + { + REAL_VALUE_TYPE r; + long i; + + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + REAL_VALUE_TO_TARGET_SINGLE (r, i); + return SPARC_SIMM13_P (i); + } + + return 0; +} + +/* Nonzero if OP is a floating point constant which can + be loaded into an integer register using a high/losum + instruction sequence. */ + +int +fp_high_losum_p (rtx op) +{ + /* The constraints calling this should only be in + SFmode move insns, so any constant which cannot + be moved using a single insn will do. */ + if (GET_CODE (op) == CONST_DOUBLE) + { + REAL_VALUE_TYPE r; + long i; + + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + REAL_VALUE_TO_TARGET_SINGLE (r, i); + return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i); + } + + return 0; +} + +/* Return true if the address of LABEL can be loaded by means of the + mov{si,di}_pic_label_ref patterns in PIC mode. */ + +static bool +can_use_mov_pic_label_ref (rtx label) +{ + /* VxWorks does not impose a fixed gap between segments; the run-time + gap can be different from the object-file gap. We therefore can't + assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we + are absolutely sure that X is in the same segment as the GOT. + Unfortunately, the flexibility of linker scripts means that we + can't be sure of that in general, so assume that GOT-relative + accesses are never valid on VxWorks. */ + if (TARGET_VXWORKS_RTP) + return false; + + /* Similarly, if the label is non-local, it might end up being placed + in a different section than the current one; now mov_pic_label_ref + requires the label and the code to be in the same section. */ + if (LABEL_REF_NONLOCAL_P (label)) + return false; + + /* Finally, if we are reordering basic blocks and partition into hot + and cold sections, this might happen for any label. */ + if (flag_reorder_blocks_and_partition) + return false; + + return true; +} + +/* Expand a move instruction. Return true if all work is done. */ + +bool +sparc_expand_move (enum machine_mode mode, rtx *operands) +{ + /* Handle sets of MEM first. */ + if (GET_CODE (operands[0]) == MEM) + { + /* 0 is a register (or a pair of registers) on SPARC. */ + if (register_or_zero_operand (operands[1], mode)) + return false; + + if (!reload_in_progress) + { + operands[0] = validize_mem (operands[0]); + operands[1] = force_reg (mode, operands[1]); + } + } + + /* Fixup TLS cases. */ + if (TARGET_HAVE_TLS + && CONSTANT_P (operands[1]) + && sparc_tls_referenced_p (operands [1])) + { + operands[1] = sparc_legitimize_tls_address (operands[1]); + return false; + } + + /* Fixup PIC cases. */ + if (flag_pic && CONSTANT_P (operands[1])) + { + if (pic_address_needs_scratch (operands[1])) + operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX); + + /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */ + if (GET_CODE (operands[1]) == LABEL_REF + && can_use_mov_pic_label_ref (operands[1])) + { + if (mode == SImode) + { + emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1])); + return true; + } + + if (mode == DImode) + { + gcc_assert (TARGET_ARCH64); + emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1])); + return true; + } + } + + if (symbolic_operand (operands[1], mode)) + { + operands[1] + = sparc_legitimize_pic_address (operands[1], + reload_in_progress + ? operands[0] : NULL_RTX); + return false; + } + } + + /* If we are trying to toss an integer constant into FP registers, + or loading a FP or vector constant, force it into memory. */ + if (CONSTANT_P (operands[1]) + && REG_P (operands[0]) + && (SPARC_FP_REG_P (REGNO (operands[0])) + || SCALAR_FLOAT_MODE_P (mode) + || VECTOR_MODE_P (mode))) + { + /* emit_group_store will send such bogosity to us when it is + not storing directly into memory. So fix this up to avoid + crashes in output_constant_pool. */ + if (operands [1] == const0_rtx) + operands[1] = CONST0_RTX (mode); + + /* We can clear FP registers if TARGET_VIS, and always other regs. */ + if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG) + && const_zero_operand (operands[1], mode)) + return false; + + if (REGNO (operands[0]) < SPARC_FIRST_FP_REG + /* We are able to build any SF constant in integer registers + with at most 2 instructions. */ + && (mode == SFmode + /* And any DF constant in integer registers. */ + || (mode == DFmode + && (reload_completed || reload_in_progress)))) + return false; + + operands[1] = force_const_mem (mode, operands[1]); + if (!reload_in_progress) + operands[1] = validize_mem (operands[1]); + return false; + } + + /* Accept non-constants and valid constants unmodified. */ + if (!CONSTANT_P (operands[1]) + || GET_CODE (operands[1]) == HIGH + || input_operand (operands[1], mode)) + return false; + + switch (mode) + { + case QImode: + /* All QImode constants require only one insn, so proceed. */ + break; + + case HImode: + case SImode: + sparc_emit_set_const32 (operands[0], operands[1]); + return true; + + case DImode: + /* input_operand should have filtered out 32-bit mode. */ + sparc_emit_set_const64 (operands[0], operands[1]); + return true; + + default: + gcc_unreachable (); + } + + return false; +} + +/* Load OP1, a 32-bit constant, into OP0, a register. + We know it can't be done in one insn when we get + here, the move expander guarantees this. */ + +static void +sparc_emit_set_const32 (rtx op0, rtx op1) +{ + enum machine_mode mode = GET_MODE (op0); + rtx temp; + + if (reload_in_progress || reload_completed) + temp = op0; + else + temp = gen_reg_rtx (mode); + + if (GET_CODE (op1) == CONST_INT) + { + gcc_assert (!small_int_operand (op1, mode) + && !const_high_operand (op1, mode)); + + /* Emit them as real moves instead of a HIGH/LO_SUM, + this way CSE can see everything and reuse intermediate + values if it wants. */ + emit_insn (gen_rtx_SET (VOIDmode, temp, + GEN_INT (INTVAL (op1) + & ~(HOST_WIDE_INT)0x3ff))); + + emit_insn (gen_rtx_SET (VOIDmode, + op0, + gen_rtx_IOR (mode, temp, + GEN_INT (INTVAL (op1) & 0x3ff)))); + } + else + { + /* A symbol, emit in the traditional way. */ + emit_insn (gen_rtx_SET (VOIDmode, temp, + gen_rtx_HIGH (mode, op1))); + emit_insn (gen_rtx_SET (VOIDmode, + op0, gen_rtx_LO_SUM (mode, temp, op1))); + } +} + +/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register. + If TEMP is nonzero, we are forbidden to use any other scratch + registers. Otherwise, we are allowed to generate them as needed. + + Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY + or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */ + +void +sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp) +{ + rtx temp1, temp2, temp3, temp4, temp5; + rtx ti_temp = 0; + + if (temp && GET_MODE (temp) == TImode) + { + ti_temp = temp; + temp = gen_rtx_REG (DImode, REGNO (temp)); + } + + /* SPARC-V9 code-model support. */ + switch (sparc_cmodel) + { + case CM_MEDLOW: + /* The range spanned by all instructions in the object is less + than 2^31 bytes (2GB) and the distance from any instruction + to the location of the label _GLOBAL_OFFSET_TABLE_ is less + than 2^31 bytes (2GB). + + The executable must be in the low 4TB of the virtual address + space. + + sethi %hi(symbol), %temp1 + or %temp1, %lo(symbol), %reg */ + if (temp) + temp1 = temp; /* op0 is allowed. */ + else + temp1 = gen_reg_rtx (DImode); + + emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1))); + emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1))); + break; + + case CM_MEDMID: + /* The range spanned by all instructions in the object is less + than 2^31 bytes (2GB) and the distance from any instruction + to the location of the label _GLOBAL_OFFSET_TABLE_ is less + than 2^31 bytes (2GB). + + The executable must be in the low 16TB of the virtual address + space. + + sethi %h44(symbol), %temp1 + or %temp1, %m44(symbol), %temp2 + sllx %temp2, 12, %temp3 + or %temp3, %l44(symbol), %reg */ + if (temp) + { + temp1 = op0; + temp2 = op0; + temp3 = temp; /* op0 is allowed. */ + } + else + { + temp1 = gen_reg_rtx (DImode); + temp2 = gen_reg_rtx (DImode); + temp3 = gen_reg_rtx (DImode); + } + + emit_insn (gen_seth44 (temp1, op1)); + emit_insn (gen_setm44 (temp2, temp1, op1)); + emit_insn (gen_rtx_SET (VOIDmode, temp3, + gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12)))); + emit_insn (gen_setl44 (op0, temp3, op1)); + break; + + case CM_MEDANY: + /* The range spanned by all instructions in the object is less + than 2^31 bytes (2GB) and the distance from any instruction + to the location of the label _GLOBAL_OFFSET_TABLE_ is less + than 2^31 bytes (2GB). + + The executable can be placed anywhere in the virtual address + space. + + sethi %hh(symbol), %temp1 + sethi %lm(symbol), %temp2 + or %temp1, %hm(symbol), %temp3 + sllx %temp3, 32, %temp4 + or %temp4, %temp2, %temp5 + or %temp5, %lo(symbol), %reg */ + if (temp) + { + /* It is possible that one of the registers we got for operands[2] + might coincide with that of operands[0] (which is why we made + it TImode). Pick the other one to use as our scratch. */ + if (rtx_equal_p (temp, op0)) + { + gcc_assert (ti_temp); + temp = gen_rtx_REG (DImode, REGNO (temp) + 1); + } + temp1 = op0; + temp2 = temp; /* op0 is _not_ allowed, see above. */ + temp3 = op0; + temp4 = op0; + temp5 = op0; + } + else + { + temp1 = gen_reg_rtx (DImode); + temp2 = gen_reg_rtx (DImode); + temp3 = gen_reg_rtx (DImode); + temp4 = gen_reg_rtx (DImode); + temp5 = gen_reg_rtx (DImode); + } + + emit_insn (gen_sethh (temp1, op1)); + emit_insn (gen_setlm (temp2, op1)); + emit_insn (gen_sethm (temp3, temp1, op1)); + emit_insn (gen_rtx_SET (VOIDmode, temp4, + gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); + emit_insn (gen_rtx_SET (VOIDmode, temp5, + gen_rtx_PLUS (DImode, temp4, temp2))); + emit_insn (gen_setlo (op0, temp5, op1)); + break; + + case CM_EMBMEDANY: + /* Old old old backwards compatibility kruft here. + Essentially it is MEDLOW with a fixed 64-bit + virtual base added to all data segment addresses. + Text-segment stuff is computed like MEDANY, we can't + reuse the code above because the relocation knobs + look different. + + Data segment: sethi %hi(symbol), %temp1 + add %temp1, EMBMEDANY_BASE_REG, %temp2 + or %temp2, %lo(symbol), %reg */ + if (data_segment_operand (op1, GET_MODE (op1))) + { + if (temp) + { + temp1 = temp; /* op0 is allowed. */ + temp2 = op0; + } + else + { + temp1 = gen_reg_rtx (DImode); + temp2 = gen_reg_rtx (DImode); + } + + emit_insn (gen_embmedany_sethi (temp1, op1)); + emit_insn (gen_embmedany_brsum (temp2, temp1)); + emit_insn (gen_embmedany_losum (op0, temp2, op1)); + } + + /* Text segment: sethi %uhi(symbol), %temp1 + sethi %hi(symbol), %temp2 + or %temp1, %ulo(symbol), %temp3 + sllx %temp3, 32, %temp4 + or %temp4, %temp2, %temp5 + or %temp5, %lo(symbol), %reg */ + else + { + if (temp) + { + /* It is possible that one of the registers we got for operands[2] + might coincide with that of operands[0] (which is why we made + it TImode). Pick the other one to use as our scratch. */ + if (rtx_equal_p (temp, op0)) + { + gcc_assert (ti_temp); + temp = gen_rtx_REG (DImode, REGNO (temp) + 1); + } + temp1 = op0; + temp2 = temp; /* op0 is _not_ allowed, see above. */ + temp3 = op0; + temp4 = op0; + temp5 = op0; + } + else + { + temp1 = gen_reg_rtx (DImode); + temp2 = gen_reg_rtx (DImode); + temp3 = gen_reg_rtx (DImode); + temp4 = gen_reg_rtx (DImode); + temp5 = gen_reg_rtx (DImode); + } + + emit_insn (gen_embmedany_textuhi (temp1, op1)); + emit_insn (gen_embmedany_texthi (temp2, op1)); + emit_insn (gen_embmedany_textulo (temp3, temp1, op1)); + emit_insn (gen_rtx_SET (VOIDmode, temp4, + gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); + emit_insn (gen_rtx_SET (VOIDmode, temp5, + gen_rtx_PLUS (DImode, temp4, temp2))); + emit_insn (gen_embmedany_textlo (op0, temp5, op1)); + } + break; + + default: + gcc_unreachable (); + } +} + +#if HOST_BITS_PER_WIDE_INT == 32 +static void +sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED) +{ + gcc_unreachable (); +} +#else +/* These avoid problems when cross compiling. If we do not + go through all this hair then the optimizer will see + invalid REG_EQUAL notes or in some cases none at all. */ +static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT); +static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT); +static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT); +static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT); + +/* The optimizer is not to assume anything about exactly + which bits are set for a HIGH, they are unspecified. + Unfortunately this leads to many missed optimizations + during CSE. We mask out the non-HIGH bits, and matches + a plain movdi, to alleviate this problem. */ +static rtx +gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val) +{ + return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff)); +} + +static rtx +gen_safe_SET64 (rtx dest, HOST_WIDE_INT val) +{ + return gen_rtx_SET (VOIDmode, dest, GEN_INT (val)); +} + +static rtx +gen_safe_OR64 (rtx src, HOST_WIDE_INT val) +{ + return gen_rtx_IOR (DImode, src, GEN_INT (val)); +} + +static rtx +gen_safe_XOR64 (rtx src, HOST_WIDE_INT val) +{ + return gen_rtx_XOR (DImode, src, GEN_INT (val)); +} + +/* Worker routines for 64-bit constant formation on arch64. + One of the key things to be doing in these emissions is + to create as many temp REGs as possible. This makes it + possible for half-built constants to be used later when + such values are similar to something required later on. + Without doing this, the optimizer cannot see such + opportunities. */ + +static void sparc_emit_set_const64_quick1 (rtx, rtx, + unsigned HOST_WIDE_INT, int); + +static void +sparc_emit_set_const64_quick1 (rtx op0, rtx temp, + unsigned HOST_WIDE_INT low_bits, int is_neg) +{ + unsigned HOST_WIDE_INT high_bits; + + if (is_neg) + high_bits = (~low_bits) & 0xffffffff; + else + high_bits = low_bits; + + emit_insn (gen_safe_HIGH64 (temp, high_bits)); + if (!is_neg) + { + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_safe_OR64 (temp, (high_bits & 0x3ff)))); + } + else + { + /* If we are XOR'ing with -1, then we should emit a one's complement + instead. This way the combiner will notice logical operations + such as ANDN later on and substitute. */ + if ((low_bits & 0x3ff) == 0x3ff) + { + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_NOT (DImode, temp))); + } + else + { + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_safe_XOR64 (temp, + (-(HOST_WIDE_INT)0x400 + | (low_bits & 0x3ff))))); + } + } +} + +static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, int); + +static void +sparc_emit_set_const64_quick2 (rtx op0, rtx temp, + unsigned HOST_WIDE_INT high_bits, + unsigned HOST_WIDE_INT low_immediate, + int shift_count) +{ + rtx temp2 = op0; + + if ((high_bits & 0xfffffc00) != 0) + { + emit_insn (gen_safe_HIGH64 (temp, high_bits)); + if ((high_bits & ~0xfffffc00) != 0) + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_safe_OR64 (temp, (high_bits & 0x3ff)))); + else + temp2 = temp; + } + else + { + emit_insn (gen_safe_SET64 (temp, high_bits)); + temp2 = temp; + } + + /* Now shift it up into place. */ + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_ASHIFT (DImode, temp2, + GEN_INT (shift_count)))); + + /* If there is a low immediate part piece, finish up by + putting that in as well. */ + if (low_immediate != 0) + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_safe_OR64 (op0, low_immediate))); +} + +static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); + +/* Full 64-bit constant decomposition. Even though this is the + 'worst' case, we still optimize a few things away. */ +static void +sparc_emit_set_const64_longway (rtx op0, rtx temp, + unsigned HOST_WIDE_INT high_bits, + unsigned HOST_WIDE_INT low_bits) +{ + rtx sub_temp; + + if (reload_in_progress || reload_completed) + sub_temp = op0; + else + sub_temp = gen_reg_rtx (DImode); + + if ((high_bits & 0xfffffc00) != 0) + { + emit_insn (gen_safe_HIGH64 (temp, high_bits)); + if ((high_bits & ~0xfffffc00) != 0) + emit_insn (gen_rtx_SET (VOIDmode, + sub_temp, + gen_safe_OR64 (temp, (high_bits & 0x3ff)))); + else + sub_temp = temp; + } + else + { + emit_insn (gen_safe_SET64 (temp, high_bits)); + sub_temp = temp; + } + + if (!reload_in_progress && !reload_completed) + { + rtx temp2 = gen_reg_rtx (DImode); + rtx temp3 = gen_reg_rtx (DImode); + rtx temp4 = gen_reg_rtx (DImode); + + emit_insn (gen_rtx_SET (VOIDmode, temp4, + gen_rtx_ASHIFT (DImode, sub_temp, + GEN_INT (32)))); + + emit_insn (gen_safe_HIGH64 (temp2, low_bits)); + if ((low_bits & ~0xfffffc00) != 0) + { + emit_insn (gen_rtx_SET (VOIDmode, temp3, + gen_safe_OR64 (temp2, (low_bits & 0x3ff)))); + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_PLUS (DImode, temp4, temp3))); + } + else + { + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_PLUS (DImode, temp4, temp2))); + } + } + else + { + rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff); + rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff); + rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff); + int to_shift = 12; + + /* We are in the middle of reload, so this is really + painful. However we do still make an attempt to + avoid emitting truly stupid code. */ + if (low1 != const0_rtx) + { + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_ASHIFT (DImode, sub_temp, + GEN_INT (to_shift)))); + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_IOR (DImode, op0, low1))); + sub_temp = op0; + to_shift = 12; + } + else + { + to_shift += 12; + } + if (low2 != const0_rtx) + { + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_ASHIFT (DImode, sub_temp, + GEN_INT (to_shift)))); + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_IOR (DImode, op0, low2))); + sub_temp = op0; + to_shift = 8; + } + else + { + to_shift += 8; + } + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_ASHIFT (DImode, sub_temp, + GEN_INT (to_shift)))); + if (low3 != const0_rtx) + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_IOR (DImode, op0, low3))); + /* phew... */ + } +} + +/* Analyze a 64-bit constant for certain properties. */ +static void analyze_64bit_constant (unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + int *, int *, int *); + +static void +analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits, + unsigned HOST_WIDE_INT low_bits, + int *hbsp, int *lbsp, int *abbasp) +{ + int lowest_bit_set, highest_bit_set, all_bits_between_are_set; + int i; + + lowest_bit_set = highest_bit_set = -1; + i = 0; + do + { + if ((lowest_bit_set == -1) + && ((low_bits >> i) & 1)) + lowest_bit_set = i; + if ((highest_bit_set == -1) + && ((high_bits >> (32 - i - 1)) & 1)) + highest_bit_set = (64 - i - 1); + } + while (++i < 32 + && ((highest_bit_set == -1) + || (lowest_bit_set == -1))); + if (i == 32) + { + i = 0; + do + { + if ((lowest_bit_set == -1) + && ((high_bits >> i) & 1)) + lowest_bit_set = i + 32; + if ((highest_bit_set == -1) + && ((low_bits >> (32 - i - 1)) & 1)) + highest_bit_set = 32 - i - 1; + } + while (++i < 32 + && ((highest_bit_set == -1) + || (lowest_bit_set == -1))); + } + /* If there are no bits set this should have gone out + as one instruction! */ + gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1); + all_bits_between_are_set = 1; + for (i = lowest_bit_set; i <= highest_bit_set; i++) + { + if (i < 32) + { + if ((low_bits & (1 << i)) != 0) + continue; + } + else + { + if ((high_bits & (1 << (i - 32))) != 0) + continue; + } + all_bits_between_are_set = 0; + break; + } + *hbsp = highest_bit_set; + *lbsp = lowest_bit_set; + *abbasp = all_bits_between_are_set; +} + +static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT); + +static int +const64_is_2insns (unsigned HOST_WIDE_INT high_bits, + unsigned HOST_WIDE_INT low_bits) +{ + int highest_bit_set, lowest_bit_set, all_bits_between_are_set; + + if (high_bits == 0 + || high_bits == 0xffffffff) + return 1; + + analyze_64bit_constant (high_bits, low_bits, + &highest_bit_set, &lowest_bit_set, + &all_bits_between_are_set); + + if ((highest_bit_set == 63 + || lowest_bit_set == 0) + && all_bits_between_are_set != 0) + return 1; + + if ((highest_bit_set - lowest_bit_set) < 21) + return 1; + + return 0; +} + +static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + int, int); + +static unsigned HOST_WIDE_INT +create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits, + unsigned HOST_WIDE_INT low_bits, + int lowest_bit_set, int shift) +{ + HOST_WIDE_INT hi, lo; + + if (lowest_bit_set < 32) + { + lo = (low_bits >> lowest_bit_set) << shift; + hi = ((high_bits << (32 - lowest_bit_set)) << shift); + } + else + { + lo = 0; + hi = ((high_bits >> (lowest_bit_set - 32)) << shift); + } + gcc_assert (! (hi & lo)); + return (hi | lo); +} + +/* Here we are sure to be arch64 and this is an integer constant + being loaded into a register. Emit the most efficient + insn sequence possible. Detection of all the 1-insn cases + has been done already. */ +static void +sparc_emit_set_const64 (rtx op0, rtx op1) +{ + unsigned HOST_WIDE_INT high_bits, low_bits; + int lowest_bit_set, highest_bit_set; + int all_bits_between_are_set; + rtx temp = 0; + + /* Sanity check that we know what we are working with. */ + gcc_assert (TARGET_ARCH64 + && (GET_CODE (op0) == SUBREG + || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0))))); + + if (reload_in_progress || reload_completed) + temp = op0; + + if (GET_CODE (op1) != CONST_INT) + { + sparc_emit_set_symbolic_const64 (op0, op1, temp); + return; + } + + if (! temp) + temp = gen_reg_rtx (DImode); + + high_bits = ((INTVAL (op1) >> 32) & 0xffffffff); + low_bits = (INTVAL (op1) & 0xffffffff); + + /* low_bits bits 0 --> 31 + high_bits bits 32 --> 63 */ + + analyze_64bit_constant (high_bits, low_bits, + &highest_bit_set, &lowest_bit_set, + &all_bits_between_are_set); + + /* First try for a 2-insn sequence. */ + + /* These situations are preferred because the optimizer can + * do more things with them: + * 1) mov -1, %reg + * sllx %reg, shift, %reg + * 2) mov -1, %reg + * srlx %reg, shift, %reg + * 3) mov some_small_const, %reg + * sllx %reg, shift, %reg + */ + if (((highest_bit_set == 63 + || lowest_bit_set == 0) + && all_bits_between_are_set != 0) + || ((highest_bit_set - lowest_bit_set) < 12)) + { + HOST_WIDE_INT the_const = -1; + int shift = lowest_bit_set; + + if ((highest_bit_set != 63 + && lowest_bit_set != 0) + || all_bits_between_are_set == 0) + { + the_const = + create_simple_focus_bits (high_bits, low_bits, + lowest_bit_set, 0); + } + else if (lowest_bit_set == 0) + shift = -(63 - highest_bit_set); + + gcc_assert (SPARC_SIMM13_P (the_const)); + gcc_assert (shift != 0); + + emit_insn (gen_safe_SET64 (temp, the_const)); + if (shift > 0) + emit_insn (gen_rtx_SET (VOIDmode, + op0, + gen_rtx_ASHIFT (DImode, + temp, + GEN_INT (shift)))); + else if (shift < 0) + emit_insn (gen_rtx_SET (VOIDmode, + op0, + gen_rtx_LSHIFTRT (DImode, + temp, + GEN_INT (-shift)))); + return; + } + + /* Now a range of 22 or less bits set somewhere. + * 1) sethi %hi(focus_bits), %reg + * sllx %reg, shift, %reg + * 2) sethi %hi(focus_bits), %reg + * srlx %reg, shift, %reg + */ + if ((highest_bit_set - lowest_bit_set) < 21) + { + unsigned HOST_WIDE_INT focus_bits = + create_simple_focus_bits (high_bits, low_bits, + lowest_bit_set, 10); + + gcc_assert (SPARC_SETHI_P (focus_bits)); + gcc_assert (lowest_bit_set != 10); + + emit_insn (gen_safe_HIGH64 (temp, focus_bits)); + + /* If lowest_bit_set == 10 then a sethi alone could have done it. */ + if (lowest_bit_set < 10) + emit_insn (gen_rtx_SET (VOIDmode, + op0, + gen_rtx_LSHIFTRT (DImode, temp, + GEN_INT (10 - lowest_bit_set)))); + else if (lowest_bit_set > 10) + emit_insn (gen_rtx_SET (VOIDmode, + op0, + gen_rtx_ASHIFT (DImode, temp, + GEN_INT (lowest_bit_set - 10)))); + return; + } + + /* 1) sethi %hi(low_bits), %reg + * or %reg, %lo(low_bits), %reg + * 2) sethi %hi(~low_bits), %reg + * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg + */ + if (high_bits == 0 + || high_bits == 0xffffffff) + { + sparc_emit_set_const64_quick1 (op0, temp, low_bits, + (high_bits == 0xffffffff)); + return; + } + + /* Now, try 3-insn sequences. */ + + /* 1) sethi %hi(high_bits), %reg + * or %reg, %lo(high_bits), %reg + * sllx %reg, 32, %reg + */ + if (low_bits == 0) + { + sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32); + return; + } + + /* We may be able to do something quick + when the constant is negated, so try that. */ + if (const64_is_2insns ((~high_bits) & 0xffffffff, + (~low_bits) & 0xfffffc00)) + { + /* NOTE: The trailing bits get XOR'd so we need the + non-negated bits, not the negated ones. */ + unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff; + + if ((((~high_bits) & 0xffffffff) == 0 + && ((~low_bits) & 0x80000000) == 0) + || (((~high_bits) & 0xffffffff) == 0xffffffff + && ((~low_bits) & 0x80000000) != 0)) + { + unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff); + + if ((SPARC_SETHI_P (fast_int) + && (~high_bits & 0xffffffff) == 0) + || SPARC_SIMM13_P (fast_int)) + emit_insn (gen_safe_SET64 (temp, fast_int)); + else + sparc_emit_set_const64 (temp, GEN_INT (fast_int)); + } + else + { + rtx negated_const; + negated_const = GEN_INT (((~low_bits) & 0xfffffc00) | + (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32)); + sparc_emit_set_const64 (temp, negated_const); + } + + /* If we are XOR'ing with -1, then we should emit a one's complement + instead. This way the combiner will notice logical operations + such as ANDN later on and substitute. */ + if (trailing_bits == 0x3ff) + { + emit_insn (gen_rtx_SET (VOIDmode, op0, + gen_rtx_NOT (DImode, temp))); + } + else + { + emit_insn (gen_rtx_SET (VOIDmode, + op0, + gen_safe_XOR64 (temp, + (-0x400 | trailing_bits)))); + } + return; + } + + /* 1) sethi %hi(xxx), %reg + * or %reg, %lo(xxx), %reg + * sllx %reg, yyy, %reg + * + * ??? This is just a generalized version of the low_bits==0 + * thing above, FIXME... + */ + if ((highest_bit_set - lowest_bit_set) < 32) + { + unsigned HOST_WIDE_INT focus_bits = + create_simple_focus_bits (high_bits, low_bits, + lowest_bit_set, 0); + + /* We can't get here in this state. */ + gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32); + + /* So what we know is that the set bits straddle the + middle of the 64-bit word. */ + sparc_emit_set_const64_quick2 (op0, temp, + focus_bits, 0, + lowest_bit_set); + return; + } + + /* 1) sethi %hi(high_bits), %reg + * or %reg, %lo(high_bits), %reg + * sllx %reg, 32, %reg + * or %reg, low_bits, %reg + */ + if (SPARC_SIMM13_P(low_bits) + && ((int)low_bits > 0)) + { + sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32); + return; + } + + /* The easiest way when all else fails, is full decomposition. */ + sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits); +} +#endif /* HOST_BITS_PER_WIDE_INT == 32 */ + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. For floating-point, + CCFP[E]mode is used. CC_NOOVmode should be used when the first operand + is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special + processing is needed. */ + +enum machine_mode +select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED) +{ + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + { + switch (op) + { + case EQ: + case NE: + case UNORDERED: + case ORDERED: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case UNEQ: + case LTGT: + return CCFPmode; + + case LT: + case LE: + case GT: + case GE: + return CCFPEmode; + + default: + gcc_unreachable (); + } + } + else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS + || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT) + { + if (TARGET_ARCH64 && GET_MODE (x) == DImode) + return CCX_NOOVmode; + else + return CC_NOOVmode; + } + else + { + if (TARGET_ARCH64 && GET_MODE (x) == DImode) + return CCXmode; + else + return CCmode; + } +} + +/* Emit the compare insn and return the CC reg for a CODE comparison + with operands X and Y. */ + +static rtx +gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y) +{ + enum machine_mode mode; + rtx cc_reg; + + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC) + return x; + + mode = SELECT_CC_MODE (code, x, y); + + /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the + fcc regs (cse can't tell they're really call clobbered regs and will + remove a duplicate comparison even if there is an intervening function + call - it will then try to reload the cc reg via an int reg which is why + we need the movcc patterns). It is possible to provide the movcc + patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two + registers (say %g1,%g5) and it takes about 6 insns. A better fix would be + to tell cse that CCFPE mode registers (even pseudos) are call + clobbered. */ + + /* ??? This is an experiment. Rather than making changes to cse which may + or may not be easy/clean, we do our own cse. This is possible because + we will generate hard registers. Cse knows they're call clobbered (it + doesn't know the same thing about pseudos). If we guess wrong, no big + deal, but if we win, great! */ + + if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) +#if 1 /* experiment */ + { + int reg; + /* We cycle through the registers to ensure they're all exercised. */ + static int next_fcc_reg = 0; + /* Previous x,y for each fcc reg. */ + static rtx prev_args[4][2]; + + /* Scan prev_args for x,y. */ + for (reg = 0; reg < 4; reg++) + if (prev_args[reg][0] == x && prev_args[reg][1] == y) + break; + if (reg == 4) + { + reg = next_fcc_reg; + prev_args[reg][0] = x; + prev_args[reg][1] = y; + next_fcc_reg = (next_fcc_reg + 1) & 3; + } + cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG); + } +#else + cc_reg = gen_reg_rtx (mode); +#endif /* ! experiment */ + else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG); + else + cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG); + + /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this + will only result in an unrecognizable insn so no point in asserting. */ + emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y))); + + return cc_reg; +} + + +/* Emit the compare insn and return the CC reg for the comparison in CMP. */ + +rtx +gen_compare_reg (rtx cmp) +{ + return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1)); +} + +/* This function is used for v9 only. + DEST is the target of the Scc insn. + CODE is the code for an Scc's comparison. + X and Y are the values we compare. + + This function is needed to turn + + (set (reg:SI 110) + (gt (reg:CCX 100 %icc) + (const_int 0))) + into + (set (reg:SI 110) + (gt:DI (reg:CCX 100 %icc) + (const_int 0))) + + IE: The instruction recognizer needs to see the mode of the comparison to + find the right instruction. We could use "gt:DI" right in the + define_expand, but leaving it out allows us to handle DI, SI, etc. */ + +static int +gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y) +{ + if (! TARGET_ARCH64 + && (GET_MODE (x) == DImode + || GET_MODE (dest) == DImode)) + return 0; + + /* Try to use the movrCC insns. */ + if (TARGET_ARCH64 + && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT + && y == const0_rtx + && v9_regcmp_p (compare_code)) + { + rtx op0 = x; + rtx temp; + + /* Special case for op0 != 0. This can be done with one instruction if + dest == x. */ + + if (compare_code == NE + && GET_MODE (dest) == DImode + && rtx_equal_p (op0, dest)) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, + gen_rtx_IF_THEN_ELSE (DImode, + gen_rtx_fmt_ee (compare_code, DImode, + op0, const0_rtx), + const1_rtx, + dest))); + return 1; + } + + if (reg_overlap_mentioned_p (dest, op0)) + { + /* Handle the case where dest == x. + We "early clobber" the result. */ + op0 = gen_reg_rtx (GET_MODE (x)); + emit_move_insn (op0, x); + } + + emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx)); + if (GET_MODE (op0) != DImode) + { + temp = gen_reg_rtx (DImode); + convert_move (temp, op0, 0); + } + else + temp = op0; + emit_insn (gen_rtx_SET (VOIDmode, dest, + gen_rtx_IF_THEN_ELSE (GET_MODE (dest), + gen_rtx_fmt_ee (compare_code, DImode, + temp, const0_rtx), + const1_rtx, + dest))); + return 1; + } + else + { + x = gen_compare_reg_1 (compare_code, x, y); + y = const0_rtx; + + gcc_assert (GET_MODE (x) != CC_NOOVmode + && GET_MODE (x) != CCX_NOOVmode); + + emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx)); + emit_insn (gen_rtx_SET (VOIDmode, dest, + gen_rtx_IF_THEN_ELSE (GET_MODE (dest), + gen_rtx_fmt_ee (compare_code, + GET_MODE (x), x, y), + const1_rtx, dest))); + return 1; + } +} + + +/* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this + without jumps using the addx/subx instructions. */ + +bool +emit_scc_insn (rtx operands[]) +{ + rtx tem; + rtx x; + rtx y; + enum rtx_code code; + + /* The quad-word fp compare library routines all return nonzero to indicate + true, which is different from the equivalent libgcc routines, so we must + handle them specially here. */ + if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD) + { + operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3], + GET_CODE (operands[1])); + operands[2] = XEXP (operands[1], 0); + operands[3] = XEXP (operands[1], 1); + } + + code = GET_CODE (operands[1]); + x = operands[2]; + y = operands[3]; + + /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has + more applications). The exception to this is "reg != 0" which can + be done in one instruction on v9 (so we do it). */ + if (code == EQ) + { + if (GET_MODE (x) == SImode) + { + rtx pat = gen_seqsi_special (operands[0], x, y); + emit_insn (pat); + return true; + } + else if (GET_MODE (x) == DImode) + { + rtx pat = gen_seqdi_special (operands[0], x, y); + emit_insn (pat); + return true; + } + } + + if (code == NE) + { + if (GET_MODE (x) == SImode) + { + rtx pat = gen_snesi_special (operands[0], x, y); + emit_insn (pat); + return true; + } + else if (GET_MODE (x) == DImode) + { + rtx pat = gen_snedi_special (operands[0], x, y); + emit_insn (pat); + return true; + } + } + + /* For the rest, on v9 we can use conditional moves. */ + + if (TARGET_V9) + { + if (gen_v9_scc (operands[0], code, x, y)) + return true; + } + + /* We can do LTU and GEU using the addx/subx instructions too. And + for GTU/LEU, if both operands are registers swap them and fall + back to the easy case. */ + if (code == GTU || code == LEU) + { + if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) + && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)) + { + tem = x; + x = y; + y = tem; + code = swap_condition (code); + } + } + + if (code == LTU || code == GEU) + { + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (code, SImode, + gen_compare_reg_1 (code, x, y), + const0_rtx))); + return true; + } + + /* Nope, do branches. */ + return false; +} + +/* Emit a conditional jump insn for the v9 architecture using comparison code + CODE and jump target LABEL. + This function exists to take advantage of the v9 brxx insns. */ + +static void +emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label) +{ + emit_jump_insn (gen_rtx_SET (VOIDmode, + pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_fmt_ee (code, GET_MODE (op0), + op0, const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx))); +} + +void +emit_conditional_branch_insn (rtx operands[]) +{ + /* The quad-word fp compare library routines all return nonzero to indicate + true, which is different from the equivalent libgcc routines, so we must + handle them specially here. */ + if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD) + { + operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2], + GET_CODE (operands[0])); + operands[1] = XEXP (operands[0], 0); + operands[2] = XEXP (operands[0], 1); + } + + if (TARGET_ARCH64 && operands[2] == const0_rtx + && GET_CODE (operands[1]) == REG + && GET_MODE (operands[1]) == DImode) + { + emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]); + return; + } + + operands[1] = gen_compare_reg (operands[0]); + operands[2] = const0_rtx; + operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode, + operands[1], operands[2]); + emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2], + operands[3])); +} + + +/* Generate a DFmode part of a hard TFmode register. + REG is the TFmode hard register, LOW is 1 for the + low 64bit of the register and 0 otherwise. + */ +rtx +gen_df_reg (rtx reg, int low) +{ + int regno = REGNO (reg); + + if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0)) + regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2; + return gen_rtx_REG (DFmode, regno); +} + +/* Generate a call to FUNC with OPERANDS. Operand 0 is the return value. + Unlike normal calls, TFmode operands are passed by reference. It is + assumed that no more than 3 operands are required. */ + +static void +emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands) +{ + rtx ret_slot = NULL, arg[3], func_sym; + int i; + + /* We only expect to be called for conversions, unary, and binary ops. */ + gcc_assert (nargs == 2 || nargs == 3); + + for (i = 0; i < nargs; ++i) + { + rtx this_arg = operands[i]; + rtx this_slot; + + /* TFmode arguments and return values are passed by reference. */ + if (GET_MODE (this_arg) == TFmode) + { + int force_stack_temp; + + force_stack_temp = 0; + if (TARGET_BUGGY_QP_LIB && i == 0) + force_stack_temp = 1; + + if (GET_CODE (this_arg) == MEM + && ! force_stack_temp) + this_arg = XEXP (this_arg, 0); + else if (CONSTANT_P (this_arg) + && ! force_stack_temp) + { + this_slot = force_const_mem (TFmode, this_arg); + this_arg = XEXP (this_slot, 0); + } + else + { + this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0); + + /* Operand 0 is the return value. We'll copy it out later. */ + if (i > 0) + emit_move_insn (this_slot, this_arg); + else + ret_slot = this_slot; + + this_arg = XEXP (this_slot, 0); + } + } + + arg[i] = this_arg; + } + + func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name); + + if (GET_MODE (operands[0]) == TFmode) + { + if (nargs == 2) + emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2, + arg[0], GET_MODE (arg[0]), + arg[1], GET_MODE (arg[1])); + else + emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3, + arg[0], GET_MODE (arg[0]), + arg[1], GET_MODE (arg[1]), + arg[2], GET_MODE (arg[2])); + + if (ret_slot) + emit_move_insn (operands[0], ret_slot); + } + else + { + rtx ret; + + gcc_assert (nargs == 2); + + ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL, + GET_MODE (operands[0]), 1, + arg[1], GET_MODE (arg[1])); + + if (ret != operands[0]) + emit_move_insn (operands[0], ret); + } +} + +/* Expand soft-float TFmode calls to sparc abi routines. */ + +static void +emit_soft_tfmode_binop (enum rtx_code code, rtx *operands) +{ + const char *func; + + switch (code) + { + case PLUS: + func = "_Qp_add"; + break; + case MINUS: + func = "_Qp_sub"; + break; + case MULT: + func = "_Qp_mul"; + break; + case DIV: + func = "_Qp_div"; + break; + default: + gcc_unreachable (); + } + + emit_soft_tfmode_libcall (func, 3, operands); +} + +static void +emit_soft_tfmode_unop (enum rtx_code code, rtx *operands) +{ + const char *func; + + gcc_assert (code == SQRT); + func = "_Qp_sqrt"; + + emit_soft_tfmode_libcall (func, 2, operands); +} + +static void +emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands) +{ + const char *func; + + switch (code) + { + case FLOAT_EXTEND: + switch (GET_MODE (operands[1])) + { + case SFmode: + func = "_Qp_stoq"; + break; + case DFmode: + func = "_Qp_dtoq"; + break; + default: + gcc_unreachable (); + } + break; + + case FLOAT_TRUNCATE: + switch (GET_MODE (operands[0])) + { + case SFmode: + func = "_Qp_qtos"; + break; + case DFmode: + func = "_Qp_qtod"; + break; + default: + gcc_unreachable (); + } + break; + + case FLOAT: + switch (GET_MODE (operands[1])) + { + case SImode: + func = "_Qp_itoq"; + if (TARGET_ARCH64) + operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]); + break; + case DImode: + func = "_Qp_xtoq"; + break; + default: + gcc_unreachable (); + } + break; + + case UNSIGNED_FLOAT: + switch (GET_MODE (operands[1])) + { + case SImode: + func = "_Qp_uitoq"; + if (TARGET_ARCH64) + operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]); + break; + case DImode: + func = "_Qp_uxtoq"; + break; + default: + gcc_unreachable (); + } + break; + + case FIX: + switch (GET_MODE (operands[0])) + { + case SImode: + func = "_Qp_qtoi"; + break; + case DImode: + func = "_Qp_qtox"; + break; + default: + gcc_unreachable (); + } + break; + + case UNSIGNED_FIX: + switch (GET_MODE (operands[0])) + { + case SImode: + func = "_Qp_qtoui"; + break; + case DImode: + func = "_Qp_qtoux"; + break; + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } + + emit_soft_tfmode_libcall (func, 2, operands); +} + +/* Expand a hard-float tfmode operation. All arguments must be in + registers. */ + +static void +emit_hard_tfmode_operation (enum rtx_code code, rtx *operands) +{ + rtx op, dest; + + if (GET_RTX_CLASS (code) == RTX_UNARY) + { + operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); + op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]); + } + else + { + operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); + operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); + op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]), + operands[1], operands[2]); + } + + if (register_operand (operands[0], VOIDmode)) + dest = operands[0]; + else + dest = gen_reg_rtx (GET_MODE (operands[0])); + + emit_insn (gen_rtx_SET (VOIDmode, dest, op)); + + if (dest != operands[0]) + emit_move_insn (operands[0], dest); +} + +void +emit_tfmode_binop (enum rtx_code code, rtx *operands) +{ + if (TARGET_HARD_QUAD) + emit_hard_tfmode_operation (code, operands); + else + emit_soft_tfmode_binop (code, operands); +} + +void +emit_tfmode_unop (enum rtx_code code, rtx *operands) +{ + if (TARGET_HARD_QUAD) + emit_hard_tfmode_operation (code, operands); + else + emit_soft_tfmode_unop (code, operands); +} + +void +emit_tfmode_cvt (enum rtx_code code, rtx *operands) +{ + if (TARGET_HARD_QUAD) + emit_hard_tfmode_operation (code, operands); + else + emit_soft_tfmode_cvt (code, operands); +} + +/* Return nonzero if a branch/jump/call instruction will be emitting + nop into its delay slot. */ + +int +empty_delay_slot (rtx insn) +{ + rtx seq; + + /* If no previous instruction (should not happen), return true. */ + if (PREV_INSN (insn) == NULL) + return 1; + + seq = NEXT_INSN (PREV_INSN (insn)); + if (GET_CODE (PATTERN (seq)) == SEQUENCE) + return 0; + + return 1; +} + +/* Return nonzero if TRIAL can go into the call delay slot. */ + +int +tls_call_delay (rtx trial) +{ + rtx pat; + + /* Binutils allows + call __tls_get_addr, %tgd_call (foo) + add %l7, %o0, %o0, %tgd_add (foo) + while Sun as/ld does not. */ + if (TARGET_GNU_TLS || !TARGET_TLS) + return 1; + + pat = PATTERN (trial); + + /* We must reject tgd_add{32|64}, i.e. + (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD))) + and tldm_add{32|64}, i.e. + (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM))) + for Sun as/ld. */ + if (GET_CODE (pat) == SET + && GET_CODE (SET_SRC (pat)) == PLUS) + { + rtx unspec = XEXP (SET_SRC (pat), 1); + + if (GET_CODE (unspec) == UNSPEC + && (XINT (unspec, 1) == UNSPEC_TLSGD + || XINT (unspec, 1) == UNSPEC_TLSLDM)) + return 0; + } + + return 1; +} + +/* Return nonzero if TRIAL, an insn, can be combined with a 'restore' + instruction. RETURN_P is true if the v9 variant 'return' is to be + considered in the test too. + + TRIAL must be a SET whose destination is a REG appropriate for the + 'restore' instruction or, if RETURN_P is true, for the 'return' + instruction. */ + +static int +eligible_for_restore_insn (rtx trial, bool return_p) +{ + rtx pat = PATTERN (trial); + rtx src = SET_SRC (pat); + + /* The 'restore src,%g0,dest' pattern for word mode and below. */ + if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT + && arith_operand (src, GET_MODE (src))) + { + if (TARGET_ARCH64) + return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); + else + return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode); + } + + /* The 'restore src,%g0,dest' pattern for double-word mode. */ + else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT + && arith_double_operand (src, GET_MODE (src))) + return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); + + /* The 'restore src,%g0,dest' pattern for float if no FPU. */ + else if (! TARGET_FPU && register_operand (src, SFmode)) + return 1; + + /* The 'restore src,%g0,dest' pattern for double if no FPU. */ + else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode)) + return 1; + + /* If we have the 'return' instruction, anything that does not use + local or output registers and can go into a delay slot wins. */ + else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1) + && (get_attr_in_uncond_branch_delay (trial) + == IN_UNCOND_BRANCH_DELAY_TRUE)) + return 1; + + /* The 'restore src1,src2,dest' pattern for SImode. */ + else if (GET_CODE (src) == PLUS + && register_operand (XEXP (src, 0), SImode) + && arith_operand (XEXP (src, 1), SImode)) + return 1; + + /* The 'restore src1,src2,dest' pattern for DImode. */ + else if (GET_CODE (src) == PLUS + && register_operand (XEXP (src, 0), DImode) + && arith_double_operand (XEXP (src, 1), DImode)) + return 1; + + /* The 'restore src1,%lo(src2),dest' pattern. */ + else if (GET_CODE (src) == LO_SUM + && ! TARGET_CM_MEDMID + && ((register_operand (XEXP (src, 0), SImode) + && immediate_operand (XEXP (src, 1), SImode)) + || (TARGET_ARCH64 + && register_operand (XEXP (src, 0), DImode) + && immediate_operand (XEXP (src, 1), DImode)))) + return 1; + + /* The 'restore src,src,dest' pattern. */ + else if (GET_CODE (src) == ASHIFT + && (register_operand (XEXP (src, 0), SImode) + || register_operand (XEXP (src, 0), DImode)) + && XEXP (src, 1) == const1_rtx) + return 1; + + return 0; +} + +/* Return nonzero if TRIAL can go into the function return's + delay slot. */ + +int +eligible_for_return_delay (rtx trial) +{ + rtx pat; + + if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET) + return 0; + + if (get_attr_length (trial) != 1) + return 0; + + /* If the function uses __builtin_eh_return, the eh_return machinery + occupies the delay slot. */ + if (crtl->calls_eh_return) + return 0; + + /* In the case of a true leaf function, anything can go into the slot. */ + if (sparc_leaf_function_p) + return get_attr_in_uncond_branch_delay (trial) + == IN_UNCOND_BRANCH_DELAY_TRUE; + + pat = PATTERN (trial); + + /* Otherwise, only operations which can be done in tandem with + a `restore' or `return' insn can go into the delay slot. */ + if (GET_CODE (SET_DEST (pat)) != REG + || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)) + return 0; + + /* If this instruction sets up floating point register and we have a return + instruction, it can probably go in. But restore will not work + with FP_REGS. */ + if (REGNO (SET_DEST (pat)) >= 32) + return (TARGET_V9 + && ! epilogue_renumber (&pat, 1) + && (get_attr_in_uncond_branch_delay (trial) + == IN_UNCOND_BRANCH_DELAY_TRUE)); + + return eligible_for_restore_insn (trial, true); +} + +/* Return nonzero if TRIAL can go into the sibling call's + delay slot. */ + +int +eligible_for_sibcall_delay (rtx trial) +{ + rtx pat; + + if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET) + return 0; + + if (get_attr_length (trial) != 1) + return 0; + + pat = PATTERN (trial); + + if (sparc_leaf_function_p) + { + /* If the tail call is done using the call instruction, + we have to restore %o7 in the delay slot. */ + if (LEAF_SIBCALL_SLOT_RESERVED_P) + return 0; + + /* %g1 is used to build the function address */ + if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat)) + return 0; + + return 1; + } + + /* Otherwise, only operations which can be done in tandem with + a `restore' insn can go into the delay slot. */ + if (GET_CODE (SET_DEST (pat)) != REG + || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24) + || REGNO (SET_DEST (pat)) >= 32) + return 0; + + /* If it mentions %o7, it can't go in, because sibcall will clobber it + in most cases. */ + if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat)) + return 0; + + return eligible_for_restore_insn (trial, false); +} + +int +short_branch (int uid1, int uid2) +{ + int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2); + + /* Leave a few words of "slop". */ + if (delta >= -1023 && delta <= 1022) + return 1; + + return 0; +} + +/* Return nonzero if REG is not used after INSN. + We assume REG is a reload reg, and therefore does + not live past labels or calls or jumps. */ +int +reg_unused_after (rtx reg, rtx insn) +{ + enum rtx_code code, prev_code = UNKNOWN; + + while ((insn = NEXT_INSN (insn))) + { + if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)]) + return 1; + + code = GET_CODE (insn); + if (GET_CODE (insn) == CODE_LABEL) + return 1; + + if (INSN_P (insn)) + { + rtx set = single_set (insn); + int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set)); + if (set && in_src) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return 1; + if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) + return 0; + } + prev_code = code; + } + return 1; +} + +/* Determine if it's legal to put X into the constant pool. This + is not possible if X contains the address of a symbol that is + not constant (TLS) or not known at final link time (PIC). */ + +static bool +sparc_cannot_force_const_mem (rtx x) +{ + switch (GET_CODE (x)) + { + case CONST_INT: + case CONST_DOUBLE: + case CONST_VECTOR: + /* Accept all non-symbolic constants. */ + return false; + + case LABEL_REF: + /* Labels are OK iff we are non-PIC. */ + return flag_pic != 0; + + case SYMBOL_REF: + /* 'Naked' TLS symbol references are never OK, + non-TLS symbols are OK iff we are non-PIC. */ + if (SYMBOL_REF_TLS_MODEL (x)) + return true; + else + return flag_pic != 0; + + case CONST: + return sparc_cannot_force_const_mem (XEXP (x, 0)); + case PLUS: + case MINUS: + return sparc_cannot_force_const_mem (XEXP (x, 0)) + || sparc_cannot_force_const_mem (XEXP (x, 1)); + case UNSPEC: + return true; + default: + gcc_unreachable (); + } +} + +/* Global Offset Table support. */ +static GTY(()) rtx got_helper_rtx = NULL_RTX; +static GTY(()) rtx global_offset_table_rtx = NULL_RTX; + +/* Return the SYMBOL_REF for the Global Offset Table. */ + +static GTY(()) rtx sparc_got_symbol = NULL_RTX; + +static rtx +sparc_got (void) +{ + if (!sparc_got_symbol) + sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + + return sparc_got_symbol; +} + +/* Ensure that we are not using patterns that are not OK with PIC. */ + +int +check_pic (int i) +{ + rtx op; + + switch (flag_pic) + { + case 1: + op = recog_data.operand[i]; + gcc_assert (GET_CODE (op) != SYMBOL_REF + && (GET_CODE (op) != CONST + || (GET_CODE (XEXP (op, 0)) == MINUS + && XEXP (XEXP (op, 0), 0) == sparc_got () + && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST))); + case 2: + default: + return 1; + } +} + +/* Return true if X is an address which needs a temporary register when + reloaded while generating PIC code. */ + +int +pic_address_needs_scratch (rtx x) +{ + /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */ + if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && ! SMALL_INT (XEXP (XEXP (x, 0), 1))) + return 1; + + return 0; +} + +/* Determine if a given RTX is a valid constant. We already know this + satisfies CONSTANT_P. */ + +bool +legitimate_constant_p (rtx x) +{ + switch (GET_CODE (x)) + { + case CONST: + case SYMBOL_REF: + if (sparc_tls_referenced_p (x)) + return false; + break; + + case CONST_DOUBLE: + if (GET_MODE (x) == VOIDmode) + return true; + + /* Floating point constants are generally not ok. + The only exception is 0.0 in VIS. */ + if (TARGET_VIS + && SCALAR_FLOAT_MODE_P (GET_MODE (x)) + && const_zero_operand (x, GET_MODE (x))) + return true; + + return false; + + case CONST_VECTOR: + /* Vector constants are generally not ok. + The only exception is 0 in VIS. */ + if (TARGET_VIS + && const_zero_operand (x, GET_MODE (x))) + return true; + + return false; + + default: + break; + } + + return true; +} + +/* Determine if a given RTX is a valid constant address. */ + +bool +constant_address_p (rtx x) +{ + switch (GET_CODE (x)) + { + case LABEL_REF: + case CONST_INT: + case HIGH: + return true; + + case CONST: + if (flag_pic && pic_address_needs_scratch (x)) + return false; + return legitimate_constant_p (x); + + case SYMBOL_REF: + return !flag_pic && legitimate_constant_p (x); + + default: + return false; + } +} + +/* Nonzero if the constant value X is a legitimate general operand + when generating PIC code. It is given that flag_pic is on and + that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ + +bool +legitimate_pic_operand_p (rtx x) +{ + if (pic_address_needs_scratch (x)) + return false; + if (sparc_tls_referenced_p (x)) + return false; + return true; +} + +/* Return nonzero if ADDR is a valid memory address. + STRICT specifies whether strict register checking applies. */ + +static bool +sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict) +{ + rtx rs1 = NULL, rs2 = NULL, imm1 = NULL; + + if (REG_P (addr) || GET_CODE (addr) == SUBREG) + rs1 = addr; + else if (GET_CODE (addr) == PLUS) + { + rs1 = XEXP (addr, 0); + rs2 = XEXP (addr, 1); + + /* Canonicalize. REG comes first, if there are no regs, + LO_SUM comes first. */ + if (!REG_P (rs1) + && GET_CODE (rs1) != SUBREG + && (REG_P (rs2) + || GET_CODE (rs2) == SUBREG + || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM))) + { + rs1 = XEXP (addr, 1); + rs2 = XEXP (addr, 0); + } + + if ((flag_pic == 1 + && rs1 == pic_offset_table_rtx + && !REG_P (rs2) + && GET_CODE (rs2) != SUBREG + && GET_CODE (rs2) != LO_SUM + && GET_CODE (rs2) != MEM + && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2)) + && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode) + && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2))) + || ((REG_P (rs1) + || GET_CODE (rs1) == SUBREG) + && RTX_OK_FOR_OFFSET_P (rs2))) + { + imm1 = rs2; + rs2 = NULL; + } + else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG) + && (REG_P (rs2) || GET_CODE (rs2) == SUBREG)) + { + /* We prohibit REG + REG for TFmode when there are no quad move insns + and we consequently need to split. We do this because REG+REG + is not an offsettable address. If we get the situation in reload + where source and destination of a movtf pattern are both MEMs with + REG+REG address, then only one of them gets converted to an + offsettable address. */ + if (mode == TFmode + && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD)) + return 0; + + /* We prohibit REG + REG on ARCH32 if not optimizing for + DFmode/DImode because then mem_min_alignment is likely to be zero + after reload and the forced split would lack a matching splitter + pattern. */ + if (TARGET_ARCH32 && !optimize + && (mode == DFmode || mode == DImode)) + return 0; + } + else if (USE_AS_OFFSETABLE_LO10 + && GET_CODE (rs1) == LO_SUM + && TARGET_ARCH64 + && ! TARGET_CM_MEDMID + && RTX_OK_FOR_OLO10_P (rs2)) + { + rs2 = NULL; + imm1 = XEXP (rs1, 1); + rs1 = XEXP (rs1, 0); + if (!CONSTANT_P (imm1) + || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) + return 0; + } + } + else if (GET_CODE (addr) == LO_SUM) + { + rs1 = XEXP (addr, 0); + imm1 = XEXP (addr, 1); + + if (!CONSTANT_P (imm1) + || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) + return 0; + + /* We can't allow TFmode in 32-bit mode, because an offset greater + than the alignment (8) may cause the LO_SUM to overflow. */ + if (mode == TFmode && TARGET_ARCH32) + return 0; + } + else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr)) + return 1; + else + return 0; + + if (GET_CODE (rs1) == SUBREG) + rs1 = SUBREG_REG (rs1); + if (!REG_P (rs1)) + return 0; + + if (rs2) + { + if (GET_CODE (rs2) == SUBREG) + rs2 = SUBREG_REG (rs2); + if (!REG_P (rs2)) + return 0; + } + + if (strict) + { + if (!REGNO_OK_FOR_BASE_P (REGNO (rs1)) + || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2)))) + return 0; + } + else + { + if ((REGNO (rs1) >= 32 + && REGNO (rs1) != FRAME_POINTER_REGNUM + && REGNO (rs1) < FIRST_PSEUDO_REGISTER) + || (rs2 + && (REGNO (rs2) >= 32 + && REGNO (rs2) != FRAME_POINTER_REGNUM + && REGNO (rs2) < FIRST_PSEUDO_REGISTER))) + return 0; + } + return 1; +} + +/* Return the SYMBOL_REF for the tls_get_addr function. */ + +static GTY(()) rtx sparc_tls_symbol = NULL_RTX; + +static rtx +sparc_tls_get_addr (void) +{ + if (!sparc_tls_symbol) + sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr"); + + return sparc_tls_symbol; +} + +/* Return the Global Offset Table to be used in TLS mode. */ + +static rtx +sparc_tls_got (void) +{ + /* In PIC mode, this is just the PIC offset table. */ + if (flag_pic) + { + crtl->uses_pic_offset_table = 1; + return pic_offset_table_rtx; + } + + /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for + the GOT symbol with the 32-bit ABI, so we reload the GOT register. */ + if (TARGET_SUN_TLS && TARGET_ARCH32) + { + load_got_register (); + return global_offset_table_rtx; + } + + /* In all other cases, we load a new pseudo with the GOT symbol. */ + return copy_to_reg (sparc_got ()); +} + +/* Return true if X contains a thread-local symbol. */ + +static bool +sparc_tls_referenced_p (rtx x) +{ + if (!TARGET_HAVE_TLS) + return false; + + if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) + x = XEXP (XEXP (x, 0), 0); + + if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) + return true; + + /* That's all we handle in sparc_legitimize_tls_address for now. */ + return false; +} + +/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute + this (thread-local) address. */ + +static rtx +sparc_legitimize_tls_address (rtx addr) +{ + rtx temp1, temp2, temp3, ret, o0, got, insn; + + gcc_assert (can_create_pseudo_p ()); + + if (GET_CODE (addr) == SYMBOL_REF) + switch (SYMBOL_REF_TLS_MODEL (addr)) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + start_sequence (); + temp1 = gen_reg_rtx (SImode); + temp2 = gen_reg_rtx (SImode); + ret = gen_reg_rtx (Pmode); + o0 = gen_rtx_REG (Pmode, 8); + got = sparc_tls_got (); + emit_insn (gen_tgd_hi22 (temp1, addr)); + emit_insn (gen_tgd_lo10 (temp2, temp1, addr)); + if (TARGET_ARCH32) + { + emit_insn (gen_tgd_add32 (o0, got, temp2, addr)); + insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (), + addr, const1_rtx)); + } + else + { + emit_insn (gen_tgd_add64 (o0, got, temp2, addr)); + insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (), + addr, const1_rtx)); + } + CALL_INSN_FUNCTION_USAGE (insn) + = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0), + CALL_INSN_FUNCTION_USAGE (insn)); + insn = get_insns (); + end_sequence (); + emit_libcall_block (insn, ret, o0, addr); + break; + + case TLS_MODEL_LOCAL_DYNAMIC: + start_sequence (); + temp1 = gen_reg_rtx (SImode); + temp2 = gen_reg_rtx (SImode); + temp3 = gen_reg_rtx (Pmode); + ret = gen_reg_rtx (Pmode); + o0 = gen_rtx_REG (Pmode, 8); + got = sparc_tls_got (); + emit_insn (gen_tldm_hi22 (temp1)); + emit_insn (gen_tldm_lo10 (temp2, temp1)); + if (TARGET_ARCH32) + { + emit_insn (gen_tldm_add32 (o0, got, temp2)); + insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (), + const1_rtx)); + } + else + { + emit_insn (gen_tldm_add64 (o0, got, temp2)); + insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (), + const1_rtx)); + } + CALL_INSN_FUNCTION_USAGE (insn) + = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0), + CALL_INSN_FUNCTION_USAGE (insn)); + insn = get_insns (); + end_sequence (); + emit_libcall_block (insn, temp3, o0, + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLSLD_BASE)); + temp1 = gen_reg_rtx (SImode); + temp2 = gen_reg_rtx (SImode); + emit_insn (gen_tldo_hix22 (temp1, addr)); + emit_insn (gen_tldo_lox10 (temp2, temp1, addr)); + if (TARGET_ARCH32) + emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr)); + else + emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr)); + break; + + case TLS_MODEL_INITIAL_EXEC: + temp1 = gen_reg_rtx (SImode); + temp2 = gen_reg_rtx (SImode); + temp3 = gen_reg_rtx (Pmode); + got = sparc_tls_got (); + emit_insn (gen_tie_hi22 (temp1, addr)); + emit_insn (gen_tie_lo10 (temp2, temp1, addr)); + if (TARGET_ARCH32) + emit_insn (gen_tie_ld32 (temp3, got, temp2, addr)); + else + emit_insn (gen_tie_ld64 (temp3, got, temp2, addr)); + if (TARGET_SUN_TLS) + { + ret = gen_reg_rtx (Pmode); + if (TARGET_ARCH32) + emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7), + temp3, addr)); + else + emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7), + temp3, addr)); + } + else + ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3); + break; + + case TLS_MODEL_LOCAL_EXEC: + temp1 = gen_reg_rtx (Pmode); + temp2 = gen_reg_rtx (Pmode); + if (TARGET_ARCH32) + { + emit_insn (gen_tle_hix22_sp32 (temp1, addr)); + emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr)); + } + else + { + emit_insn (gen_tle_hix22_sp64 (temp1, addr)); + emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr)); + } + ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2); + break; + + default: + gcc_unreachable (); + } + + else if (GET_CODE (addr) == CONST) + { + rtx base, offset; + + gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS); + + base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0)); + offset = XEXP (XEXP (addr, 0), 1); + + base = force_operand (base, NULL_RTX); + if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset))) + offset = force_reg (Pmode, offset); + ret = gen_rtx_PLUS (Pmode, base, offset); + } + + else + gcc_unreachable (); /* for now ... */ + + return ret; +} + +/* Legitimize PIC addresses. If the address is already position-independent, + we return ORIG. Newly generated position-independent addresses go into a + reg. This is REG if nonzero, otherwise we allocate register(s) as + necessary. */ + +static rtx +sparc_legitimize_pic_address (rtx orig, rtx reg) +{ + bool gotdata_op = false; + + if (GET_CODE (orig) == SYMBOL_REF + /* See the comment in sparc_expand_move. */ + || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig))) + { + rtx pic_ref, address; + rtx insn; + + if (reg == 0) + { + gcc_assert (! reload_in_progress && ! reload_completed); + reg = gen_reg_rtx (Pmode); + } + + if (flag_pic == 2) + { + /* If not during reload, allocate another temp reg here for loading + in the address, so that these instructions can be optimized + properly. */ + rtx temp_reg = ((reload_in_progress || reload_completed) + ? reg : gen_reg_rtx (Pmode)); + + /* Must put the SYMBOL_REF inside an UNSPEC here so that cse + won't get confused into thinking that these two instructions + are loading in the true address of the symbol. If in the + future a PIC rtx exists, that should be used instead. */ + if (TARGET_ARCH64) + { + emit_insn (gen_movdi_high_pic (temp_reg, orig)); + emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig)); + } + else + { + emit_insn (gen_movsi_high_pic (temp_reg, orig)); + emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig)); + } + address = temp_reg; + gotdata_op = true; + } + else + address = orig; + + crtl->uses_pic_offset_table = 1; + if (gotdata_op) + { + if (TARGET_ARCH64) + insn = emit_insn (gen_movdi_pic_gotdata_op (reg, + pic_offset_table_rtx, + address, orig)); + else + insn = emit_insn (gen_movsi_pic_gotdata_op (reg, + pic_offset_table_rtx, + address, orig)); + } + else + { + pic_ref + = gen_const_mem (Pmode, + gen_rtx_PLUS (Pmode, + pic_offset_table_rtx, address)); + insn = emit_move_insn (reg, pic_ref); + } + + /* Put a REG_EQUAL note on this insn, so that it can be optimized + by loop. */ + set_unique_reg_note (insn, REG_EQUAL, orig); + return reg; + } + else if (GET_CODE (orig) == CONST) + { + rtx base, offset; + + if (GET_CODE (XEXP (orig, 0)) == PLUS + && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) + return orig; + + if (reg == 0) + { + gcc_assert (! reload_in_progress && ! reload_completed); + reg = gen_reg_rtx (Pmode); + } + + gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); + base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg); + offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), + base == reg ? NULL_RTX : reg); + + if (GET_CODE (offset) == CONST_INT) + { + if (SMALL_INT (offset)) + return plus_constant (base, INTVAL (offset)); + else if (! reload_in_progress && ! reload_completed) + offset = force_reg (Pmode, offset); + else + /* If we reach here, then something is seriously wrong. */ + gcc_unreachable (); + } + return gen_rtx_PLUS (Pmode, base, offset); + } + else if (GET_CODE (orig) == LABEL_REF) + /* ??? We ought to be checking that the register is live instead, in case + it is eliminated. */ + crtl->uses_pic_offset_table = 1; + + return orig; +} + +/* Try machine-dependent ways of modifying an illegitimate address X + to be legitimate. If we find one, return the new, valid address. + + OLDX is the address as it was before break_out_memory_refs was called. + In some cases it is useful to look at this to decide what needs to be done. + + MODE is the mode of the operand pointed to by X. + + On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */ + +static rtx +sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + rtx orig_x = x; + + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT) + x = gen_rtx_PLUS (Pmode, XEXP (x, 1), + force_operand (XEXP (x, 0), NULL_RTX)); + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT) + x = gen_rtx_PLUS (Pmode, XEXP (x, 0), + force_operand (XEXP (x, 1), NULL_RTX)); + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS) + x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX), + XEXP (x, 1)); + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS) + x = gen_rtx_PLUS (Pmode, XEXP (x, 0), + force_operand (XEXP (x, 1), NULL_RTX)); + + if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE)) + return x; + + if (sparc_tls_referenced_p (x)) + x = sparc_legitimize_tls_address (x); + else if (flag_pic) + x = sparc_legitimize_pic_address (x, NULL_RTX); + else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1))) + x = gen_rtx_PLUS (Pmode, XEXP (x, 0), + copy_to_mode_reg (Pmode, XEXP (x, 1))); + else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0))) + x = gen_rtx_PLUS (Pmode, XEXP (x, 1), + copy_to_mode_reg (Pmode, XEXP (x, 0))); + else if (GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == CONST + || GET_CODE (x) == LABEL_REF) + x = copy_to_suggested_reg (x, NULL_RTX, Pmode); + + return x; +} + +/* Delegitimize an address that was legitimized by the above function. */ + +static rtx +sparc_delegitimize_address (rtx x) +{ + x = delegitimize_mem_from_attrs (x); + + if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC) + switch (XINT (XEXP (x, 1), 1)) + { + case UNSPEC_MOVE_PIC: + case UNSPEC_TLSLE: + x = XVECEXP (XEXP (x, 1), 0, 0); + gcc_assert (GET_CODE (x) == SYMBOL_REF); + break; + default: + break; + } + + return x; +} + +/* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to + replace the input X, or the original X if no replacement is called for. + The output parameter *WIN is 1 if the calling macro should goto WIN, + 0 if it should not. + + For SPARC, we wish to handle addresses by splitting them into + HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference. + This cuts the number of extra insns by one. + + Do nothing when generating PIC code and the address is a symbolic + operand or requires a scratch register. */ + +rtx +sparc_legitimize_reload_address (rtx x, enum machine_mode mode, + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED, int *win) +{ + /* Decompose SImode constants into HIGH+LO_SUM. */ + if (CONSTANT_P (x) + && (mode != TFmode || TARGET_ARCH64) + && GET_MODE (x) == SImode + && GET_CODE (x) != LO_SUM + && GET_CODE (x) != HIGH + && sparc_cmodel <= CM_MEDLOW + && !(flag_pic + && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x)))) + { + x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x); + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type)type); + *win = 1; + return x; + } + + /* We have to recognize what we have already generated above. */ + if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH) + { + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type)type); + *win = 1; + return x; + } + + *win = 0; + return x; +} + +/* Return true if ADDR (a legitimate address expression) + has an effect that depends on the machine mode it is used for. + + In PIC mode, + + (mem:HI [%l7+a]) + + is not equivalent to + + (mem:QI [%l7+a]) (mem:QI [%l7+a+1]) + + because [%l7+a+1] is interpreted as the address of (a+1). */ + + +static bool +sparc_mode_dependent_address_p (const_rtx addr) +{ + if (flag_pic && GET_CODE (addr) == PLUS) + { + rtx op0 = XEXP (addr, 0); + rtx op1 = XEXP (addr, 1); + if (op0 == pic_offset_table_rtx + && SYMBOLIC_CONST (op1)) + return true; + } + + return false; +} + +#ifdef HAVE_GAS_HIDDEN +# define USE_HIDDEN_LINKONCE 1 +#else +# define USE_HIDDEN_LINKONCE 0 +#endif + +static void +get_pc_thunk_name (char name[32], unsigned int regno) +{ + const char *reg_name = reg_names[regno]; + + /* Skip the leading '%' as that cannot be used in a + symbol name. */ + reg_name += 1; + + if (USE_HIDDEN_LINKONCE) + sprintf (name, "__sparc_get_pc_thunk.%s", reg_name); + else + ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno); +} + +/* Wrapper around the load_pcrel_sym{si,di} patterns. */ + +static rtx +gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3) +{ + int orig_flag_pic = flag_pic; + rtx insn; + + /* The load_pcrel_sym{si,di} patterns require absolute addressing. */ + flag_pic = 0; + if (TARGET_ARCH64) + insn = gen_load_pcrel_symdi (op0, op1, op2, op3); + else + insn = gen_load_pcrel_symsi (op0, op1, op2, op3); + flag_pic = orig_flag_pic; + + return insn; +} + +/* Emit code to load the GOT register. */ + +static void +load_got_register (void) +{ + /* In PIC mode, this will retrieve pic_offset_table_rtx. */ + if (!global_offset_table_rtx) + global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM); + + if (TARGET_VXWORKS_RTP) + emit_insn (gen_vxworks_load_got ()); + else + { + /* The GOT symbol is subject to a PC-relative relocation so we need a + helper function to add the PC value and thus get the final value. */ + if (!got_helper_rtx) + { + char name[32]; + get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM); + got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); + } + + emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (), + got_helper_rtx, + GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM))); + } + + /* Need to emit this whether or not we obey regdecls, + since setjmp/longjmp can cause life info to screw up. + ??? In the case where we don't obey regdecls, this is not sufficient + since we may not fall out the bottom. */ + emit_use (global_offset_table_rtx); +} + +/* Emit a call instruction with the pattern given by PAT. ADDR is the + address of the call target. */ + +void +sparc_emit_call_insn (rtx pat, rtx addr) +{ + rtx insn; + + insn = emit_call_insn (pat); + + /* The PIC register is live on entry to VxWorks PIC PLT entries. */ + if (TARGET_VXWORKS_RTP + && flag_pic + && GET_CODE (addr) == SYMBOL_REF + && (SYMBOL_REF_DECL (addr) + ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) + : !SYMBOL_REF_LOCAL_P (addr))) + { + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); + crtl->uses_pic_offset_table = 1; + } +} + +/* Return 1 if RTX is a MEM which is known to be aligned to at + least a DESIRED byte boundary. */ + +int +mem_min_alignment (rtx mem, int desired) +{ + rtx addr, base, offset; + + /* If it's not a MEM we can't accept it. */ + if (GET_CODE (mem) != MEM) + return 0; + + /* Obviously... */ + if (!TARGET_UNALIGNED_DOUBLES + && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired) + return 1; + + /* ??? The rest of the function predates MEM_ALIGN so + there is probably a bit of redundancy. */ + addr = XEXP (mem, 0); + base = offset = NULL_RTX; + if (GET_CODE (addr) == PLUS) + { + if (GET_CODE (XEXP (addr, 0)) == REG) + { + base = XEXP (addr, 0); + + /* What we are saying here is that if the base + REG is aligned properly, the compiler will make + sure any REG based index upon it will be so + as well. */ + if (GET_CODE (XEXP (addr, 1)) == CONST_INT) + offset = XEXP (addr, 1); + else + offset = const0_rtx; + } + } + else if (GET_CODE (addr) == REG) + { + base = addr; + offset = const0_rtx; + } + + if (base != NULL_RTX) + { + int regno = REGNO (base); + + if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM) + { + /* Check if the compiler has recorded some information + about the alignment of the base REG. If reload has + completed, we already matched with proper alignments. + If not running global_alloc, reload might give us + unaligned pointer to local stack though. */ + if (((cfun != 0 + && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT) + || (optimize && reload_completed)) + && (INTVAL (offset) & (desired - 1)) == 0) + return 1; + } + else + { + if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0) + return 1; + } + } + else if (! TARGET_UNALIGNED_DOUBLES + || CONSTANT_P (addr) + || GET_CODE (addr) == LO_SUM) + { + /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES + is true, in which case we can only assume that an access is aligned if + it is to a constant address, or the address involves a LO_SUM. */ + return 1; + } + + /* An obviously unaligned address. */ + return 0; +} + + +/* Vectors to keep interesting information about registers where it can easily + be got. We used to use the actual mode value as the bit number, but there + are more than 32 modes now. Instead we use two tables: one indexed by + hard register number, and one indexed by mode. */ + +/* The purpose of sparc_mode_class is to shrink the range of modes so that + they all fit (as bit numbers) in a 32-bit word (again). Each real mode is + mapped into one sparc_mode_class mode. */ + +enum sparc_mode_class { + S_MODE, D_MODE, T_MODE, O_MODE, + SF_MODE, DF_MODE, TF_MODE, OF_MODE, + CC_MODE, CCFP_MODE +}; + +/* Modes for single-word and smaller quantities. */ +#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) + +/* Modes for double-word and smaller quantities. */ +#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) + +/* Modes for quad-word and smaller quantities. */ +#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) + +/* Modes for 8-word and smaller quantities. */ +#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE)) + +/* Modes for single-float quantities. We must allow any single word or + smaller quantity. This is because the fix/float conversion instructions + take integer inputs/outputs from the float registers. */ +#define SF_MODES (S_MODES) + +/* Modes for double-float and smaller quantities. */ +#define DF_MODES (D_MODES) + +/* Modes for quad-float and smaller quantities. */ +#define TF_MODES (DF_MODES | (1 << (int) TF_MODE)) + +/* Modes for quad-float pairs and smaller quantities. */ +#define OF_MODES (TF_MODES | (1 << (int) OF_MODE)) + +/* Modes for double-float only quantities. */ +#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE)) + +/* Modes for quad-float and double-float only quantities. */ +#define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE)) + +/* Modes for quad-float pairs and double-float only quantities. */ +#define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE)) + +/* Modes for condition codes. */ +#define CC_MODES (1 << (int) CC_MODE) +#define CCFP_MODES (1 << (int) CCFP_MODE) + +/* Value is 1 if register/mode pair is acceptable on sparc. + The funny mixture of D and T modes is because integer operations + do not specially operate on tetra quantities, so non-quad-aligned + registers can hold quadword quantities (except %o4 and %i4 because + they cross fixed registers). */ + +/* This points to either the 32 bit or the 64 bit version. */ +const int *hard_regno_mode_classes; + +static const int hard_32bit_mode_classes[] = { + S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, + T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, + T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, + T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, + + OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, + OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, + OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, + OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, + + /* FP regs f32 to f63. Only the even numbered registers actually exist, + and none can hold SFmode/SImode values. */ + OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, + OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, + OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, + OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, + + /* %fcc[0123] */ + CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, + + /* %icc */ + CC_MODES +}; + +static const int hard_64bit_mode_classes[] = { + D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, + O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, + T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, + O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, + + OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, + OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, + OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, + OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, + + /* FP regs f32 to f63. Only the even numbered registers actually exist, + and none can hold SFmode/SImode values. */ + OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, + OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, + OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, + OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, + + /* %fcc[0123] */ + CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, + + /* %icc */ + CC_MODES +}; + +int sparc_mode_class [NUM_MACHINE_MODES]; + +enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER]; + +static void +sparc_init_modes (void) +{ + int i; + + for (i = 0; i < NUM_MACHINE_MODES; i++) + { + switch (GET_MODE_CLASS (i)) + { + case MODE_INT: + case MODE_PARTIAL_INT: + case MODE_COMPLEX_INT: + if (GET_MODE_SIZE (i) <= 4) + sparc_mode_class[i] = 1 << (int) S_MODE; + else if (GET_MODE_SIZE (i) == 8) + sparc_mode_class[i] = 1 << (int) D_MODE; + else if (GET_MODE_SIZE (i) == 16) + sparc_mode_class[i] = 1 << (int) T_MODE; + else if (GET_MODE_SIZE (i) == 32) + sparc_mode_class[i] = 1 << (int) O_MODE; + else + sparc_mode_class[i] = 0; + break; + case MODE_VECTOR_INT: + if (GET_MODE_SIZE (i) <= 4) + sparc_mode_class[i] = 1 << (int)SF_MODE; + else if (GET_MODE_SIZE (i) == 8) + sparc_mode_class[i] = 1 << (int)DF_MODE; + break; + case MODE_FLOAT: + case MODE_COMPLEX_FLOAT: + if (GET_MODE_SIZE (i) <= 4) + sparc_mode_class[i] = 1 << (int) SF_MODE; + else if (GET_MODE_SIZE (i) == 8) + sparc_mode_class[i] = 1 << (int) DF_MODE; + else if (GET_MODE_SIZE (i) == 16) + sparc_mode_class[i] = 1 << (int) TF_MODE; + else if (GET_MODE_SIZE (i) == 32) + sparc_mode_class[i] = 1 << (int) OF_MODE; + else + sparc_mode_class[i] = 0; + break; + case MODE_CC: + if (i == (int) CCFPmode || i == (int) CCFPEmode) + sparc_mode_class[i] = 1 << (int) CCFP_MODE; + else + sparc_mode_class[i] = 1 << (int) CC_MODE; + break; + default: + sparc_mode_class[i] = 0; + break; + } + } + + if (TARGET_ARCH64) + hard_regno_mode_classes = hard_64bit_mode_classes; + else + hard_regno_mode_classes = hard_32bit_mode_classes; + + /* Initialize the array used by REGNO_REG_CLASS. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + if (i < 16 && TARGET_V8PLUS) + sparc_regno_reg_class[i] = I64_REGS; + else if (i < 32 || i == FRAME_POINTER_REGNUM) + sparc_regno_reg_class[i] = GENERAL_REGS; + else if (i < 64) + sparc_regno_reg_class[i] = FP_REGS; + else if (i < 96) + sparc_regno_reg_class[i] = EXTRA_FP_REGS; + else if (i < 100) + sparc_regno_reg_class[i] = FPCC_REGS; + else + sparc_regno_reg_class[i] = NO_REGS; + } +} + +/* Compute the frame size required by the function. This function is called + during the reload pass and also by sparc_expand_prologue. */ + +HOST_WIDE_INT +sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p) +{ + int outgoing_args_size = (crtl->outgoing_args_size + + REG_PARM_STACK_SPACE (current_function_decl)); + int n_regs = 0; /* N_REGS is the number of 4-byte regs saved thus far. */ + int i; + + if (TARGET_ARCH64) + { + for (i = 0; i < 8; i++) + if (df_regs_ever_live_p (i) && ! call_used_regs[i]) + n_regs += 2; + } + else + { + for (i = 0; i < 8; i += 2) + if ((df_regs_ever_live_p (i) && ! call_used_regs[i]) + || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1])) + n_regs += 2; + } + + for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2) + if ((df_regs_ever_live_p (i) && ! call_used_regs[i]) + || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1])) + n_regs += 2; + + /* Set up values for use in prologue and epilogue. */ + num_gfregs = n_regs; + + if (leaf_function_p + && n_regs == 0 + && size == 0 + && crtl->outgoing_args_size == 0) + actual_fsize = apparent_fsize = 0; + else + { + /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */ + apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8; + apparent_fsize += n_regs * 4; + actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8); + } + + /* Make sure nothing can clobber our register windows. + If a SAVE must be done, or there is a stack-local variable, + the register window area must be allocated. */ + if (! leaf_function_p || size > 0) + actual_fsize += FIRST_PARM_OFFSET (current_function_decl); + + return SPARC_STACK_ALIGN (actual_fsize); +} + +/* Output any necessary .register pseudo-ops. */ + +void +sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED) +{ +#ifdef HAVE_AS_REGISTER_PSEUDO_OP + int i; + + if (TARGET_ARCH32) + return; + + /* Check if %g[2367] were used without + .register being printed for them already. */ + for (i = 2; i < 8; i++) + { + if (df_regs_ever_live_p (i) + && ! sparc_hard_reg_printed [i]) + { + sparc_hard_reg_printed [i] = 1; + /* %g7 is used as TLS base register, use #ignore + for it instead of #scratch. */ + fprintf (file, "\t.register\t%%g%d, #%s\n", i, + i == 7 ? "ignore" : "scratch"); + } + if (i == 3) i = 5; + } +#endif +} + +#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) + +#if PROBE_INTERVAL > 4096 +#error Cannot use indexed addressing mode for stack probing +#endif + +/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, + inclusive. These are offsets from the current stack pointer. + + Note that we don't use the REG+REG addressing mode for the probes because + of the stack bias in 64-bit mode. And it doesn't really buy us anything + so the advantages of having a single code win here. */ + +static void +sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) +{ + rtx g1 = gen_rtx_REG (Pmode, 1); + + /* See if we have a constant small number of probes to generate. If so, + that's the easy case. */ + if (size <= PROBE_INTERVAL) + { + emit_move_insn (g1, GEN_INT (first)); + emit_insn (gen_rtx_SET (VOIDmode, g1, + gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); + emit_stack_probe (plus_constant (g1, -size)); + } + + /* The run-time loop is made up of 10 insns in the generic case while the + compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */ + else if (size <= 5 * PROBE_INTERVAL) + { + HOST_WIDE_INT i; + + emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL)); + emit_insn (gen_rtx_SET (VOIDmode, g1, + gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); + emit_stack_probe (g1); + + /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until + it exceeds SIZE. If only two probes are needed, this will not + generate any code. Then probe at FIRST + SIZE. */ + for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) + { + emit_insn (gen_rtx_SET (VOIDmode, g1, + plus_constant (g1, -PROBE_INTERVAL))); + emit_stack_probe (g1); + } + + emit_stack_probe (plus_constant (g1, (i - PROBE_INTERVAL) - size)); + } + + /* Otherwise, do the same as above, but in a loop. Note that we must be + extra careful with variables wrapping around because we might be at + the very top (or the very bottom) of the address space and we have + to be able to handle this case properly; in particular, we use an + equality test for the loop condition. */ + else + { + HOST_WIDE_INT rounded_size; + rtx g4 = gen_rtx_REG (Pmode, 4); + + emit_move_insn (g1, GEN_INT (first)); + + + /* Step 1: round SIZE to the previous multiple of the interval. */ + + rounded_size = size & -PROBE_INTERVAL; + emit_move_insn (g4, GEN_INT (rounded_size)); + + + /* Step 2: compute initial and final value of the loop counter. */ + + /* TEST_ADDR = SP + FIRST. */ + emit_insn (gen_rtx_SET (VOIDmode, g1, + gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); + + /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ + emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4))); + + + /* Step 3: the loop + + while (TEST_ADDR != LAST_ADDR) + { + TEST_ADDR = TEST_ADDR + PROBE_INTERVAL + probe at TEST_ADDR + } + + probes at FIRST + N * PROBE_INTERVAL for values of N from 1 + until it is equal to ROUNDED_SIZE. */ + + if (TARGET_64BIT) + emit_insn (gen_probe_stack_rangedi (g1, g1, g4)); + else + emit_insn (gen_probe_stack_rangesi (g1, g1, g4)); + + + /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time + that SIZE is equal to ROUNDED_SIZE. */ + + if (size != rounded_size) + emit_stack_probe (plus_constant (g4, rounded_size - size)); + } + + /* Make sure nothing is scheduled before we are done. */ + emit_insn (gen_blockage ()); +} + +/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are + absolute addresses. */ + +const char * +output_probe_stack_range (rtx reg1, rtx reg2) +{ + static int labelno = 0; + char loop_lab[32], end_lab[32]; + rtx xops[2]; + + ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno); + ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++); + + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); + + /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */ + xops[0] = reg1; + xops[1] = reg2; + output_asm_insn ("cmp\t%0, %1", xops); + if (TARGET_ARCH64) + fputs ("\tbe,pn\t%xcc,", asm_out_file); + else + fputs ("\tbe\t", asm_out_file); + assemble_name_raw (asm_out_file, end_lab); + fputc ('\n', asm_out_file); + + /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ + xops[1] = GEN_INT (-PROBE_INTERVAL); + output_asm_insn (" add\t%0, %1, %0", xops); + + /* Probe at TEST_ADDR and branch. */ + if (TARGET_ARCH64) + fputs ("\tba,pt\t%xcc,", asm_out_file); + else + fputs ("\tba\t", asm_out_file); + assemble_name_raw (asm_out_file, loop_lab); + fputc ('\n', asm_out_file); + xops[1] = GEN_INT (SPARC_STACK_BIAS); + output_asm_insn (" st\t%%g0, [%0+%1]", xops); + + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab); + + return ""; +} + +/* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET + as needed. LOW should be double-word aligned for 32-bit registers. + Return the new OFFSET. */ + +#define SORR_SAVE 0 +#define SORR_RESTORE 1 + +static int +save_or_restore_regs (int low, int high, rtx base, int offset, int action) +{ + rtx mem, insn; + int i; + + if (TARGET_ARCH64 && high <= 32) + { + for (i = low; i < high; i++) + { + if (df_regs_ever_live_p (i) && ! call_used_regs[i]) + { + mem = gen_frame_mem (DImode, plus_constant (base, offset)); + if (action == SORR_SAVE) + { + insn = emit_move_insn (mem, gen_rtx_REG (DImode, i)); + RTX_FRAME_RELATED_P (insn) = 1; + } + else /* action == SORR_RESTORE */ + emit_move_insn (gen_rtx_REG (DImode, i), mem); + offset += 8; + } + } + } + else + { + for (i = low; i < high; i += 2) + { + bool reg0 = df_regs_ever_live_p (i) && ! call_used_regs[i]; + bool reg1 = df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]; + enum machine_mode mode; + int regno; + + if (reg0 && reg1) + { + mode = i < 32 ? DImode : DFmode; + regno = i; + } + else if (reg0) + { + mode = i < 32 ? SImode : SFmode; + regno = i; + } + else if (reg1) + { + mode = i < 32 ? SImode : SFmode; + regno = i + 1; + offset += 4; + } + else + continue; + + mem = gen_frame_mem (mode, plus_constant (base, offset)); + if (action == SORR_SAVE) + { + insn = emit_move_insn (mem, gen_rtx_REG (mode, regno)); + RTX_FRAME_RELATED_P (insn) = 1; + } + else /* action == SORR_RESTORE */ + emit_move_insn (gen_rtx_REG (mode, regno), mem); + + /* Always preserve double-word alignment. */ + offset = (offset + 8) & -8; + } + } + + return offset; +} + +/* Emit code to save call-saved registers. */ + +static void +emit_save_or_restore_regs (int action) +{ + HOST_WIDE_INT offset; + rtx base; + + offset = frame_base_offset - apparent_fsize; + + if (offset < -4096 || offset + num_gfregs * 4 > 4095) + { + /* ??? This might be optimized a little as %g1 might already have a + value close enough that a single add insn will do. */ + /* ??? Although, all of this is probably only a temporary fix + because if %g1 can hold a function result, then + sparc_expand_epilogue will lose (the result will be + clobbered). */ + base = gen_rtx_REG (Pmode, 1); + emit_move_insn (base, GEN_INT (offset)); + emit_insn (gen_rtx_SET (VOIDmode, + base, + gen_rtx_PLUS (Pmode, frame_base_reg, base))); + offset = 0; + } + else + base = frame_base_reg; + + offset = save_or_restore_regs (0, 8, base, offset, action); + save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, action); +} + +/* Generate a save_register_window insn. */ + +static rtx +gen_save_register_window (rtx increment) +{ + if (TARGET_ARCH64) + return gen_save_register_windowdi (increment); + else + return gen_save_register_windowsi (increment); +} + +/* Generate an increment for the stack pointer. */ + +static rtx +gen_stack_pointer_inc (rtx increment) +{ + return gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + gen_rtx_PLUS (Pmode, + stack_pointer_rtx, + increment)); +} + +/* Generate a decrement for the stack pointer. */ + +static rtx +gen_stack_pointer_dec (rtx decrement) +{ + return gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + gen_rtx_MINUS (Pmode, + stack_pointer_rtx, + decrement)); +} + +/* Expand the function prologue. The prologue is responsible for reserving + storage for the frame, saving the call-saved registers and loading the + GOT register if needed. */ + +void +sparc_expand_prologue (void) +{ + rtx insn; + int i; + + /* Compute a snapshot of current_function_uses_only_leaf_regs. Relying + on the final value of the flag means deferring the prologue/epilogue + expansion until just before the second scheduling pass, which is too + late to emit multiple epilogues or return insns. + + Of course we are making the assumption that the value of the flag + will not change between now and its final value. Of the three parts + of the formula, only the last one can reasonably vary. Let's take a + closer look, after assuming that the first two ones are set to true + (otherwise the last value is effectively silenced). + + If only_leaf_regs_used returns false, the global predicate will also + be false so the actual frame size calculated below will be positive. + As a consequence, the save_register_window insn will be emitted in + the instruction stream; now this insn explicitly references %fp + which is not a leaf register so only_leaf_regs_used will always + return false subsequently. + + If only_leaf_regs_used returns true, we hope that the subsequent + optimization passes won't cause non-leaf registers to pop up. For + example, the regrename pass has special provisions to not rename to + non-leaf registers in a leaf function. */ + sparc_leaf_function_p + = optimize > 0 && current_function_is_leaf && only_leaf_regs_used (); + + /* Need to use actual_fsize, since we are also allocating + space for our callee (and our own register save area). */ + actual_fsize + = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); + + /* Advertise that the data calculated just above are now valid. */ + sparc_prologue_data_valid_p = true; + + if (flag_stack_usage) + current_function_static_stack_size = actual_fsize; + + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && actual_fsize) + sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, actual_fsize); + + if (sparc_leaf_function_p) + { + frame_base_reg = stack_pointer_rtx; + frame_base_offset = actual_fsize + SPARC_STACK_BIAS; + } + else + { + frame_base_reg = hard_frame_pointer_rtx; + frame_base_offset = SPARC_STACK_BIAS; + } + + if (actual_fsize == 0) + /* do nothing. */ ; + else if (sparc_leaf_function_p) + { + if (actual_fsize <= 4096) + insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize))); + else if (actual_fsize <= 8192) + { + insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); + RTX_FRAME_RELATED_P (insn) = 1; + + /* %sp is still the CFA register. */ + insn + = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize))); + } + else + { + rtx reg = gen_rtx_REG (Pmode, 1); + emit_move_insn (reg, GEN_INT (-actual_fsize)); + insn = emit_insn (gen_stack_pointer_inc (reg)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_stack_pointer_inc (GEN_INT (-actual_fsize))); + } + + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + if (actual_fsize <= 4096) + insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize))); + else if (actual_fsize <= 8192) + { + insn = emit_insn (gen_save_register_window (GEN_INT (-4096))); + + /* %sp is not the CFA register anymore. */ + emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize))); + + /* Make sure no %fp-based store is issued until after the frame is + established. The offset between the frame pointer and the stack + pointer is calculated relative to the value of the stack pointer + at the end of the function prologue, and moving instructions that + access the stack via the frame pointer between the instructions + that decrement the stack pointer could result in accessing the + register window save area, which is volatile. */ + emit_insn (gen_frame_blockage ()); + } + else + { + rtx reg = gen_rtx_REG (Pmode, 1); + emit_move_insn (reg, GEN_INT (-actual_fsize)); + insn = emit_insn (gen_save_register_window (reg)); + } + + RTX_FRAME_RELATED_P (insn) = 1; + for (i=0; i < XVECLEN (PATTERN (insn), 0); i++) + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1; + } + + if (num_gfregs) + emit_save_or_restore_regs (SORR_SAVE); + + /* Load the GOT register if needed. */ + if (crtl->uses_pic_offset_table) + load_got_register (); +} + +/* This function generates the assembly code for function entry, which boils + down to emitting the necessary .register directives. */ + +static void +sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + /* Check that the assumption we made in sparc_expand_prologue is valid. */ + gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs); + + sparc_output_scratch_registers (file); +} + +/* Expand the function epilogue, either normal or part of a sibcall. + We emit all the instructions except the return or the call. */ + +void +sparc_expand_epilogue (void) +{ + if (num_gfregs) + emit_save_or_restore_regs (SORR_RESTORE); + + if (actual_fsize == 0) + /* do nothing. */ ; + else if (sparc_leaf_function_p) + { + if (actual_fsize <= 4096) + emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize))); + else if (actual_fsize <= 8192) + { + emit_insn (gen_stack_pointer_dec (GEN_INT (-4096))); + emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize))); + } + else + { + rtx reg = gen_rtx_REG (Pmode, 1); + emit_move_insn (reg, GEN_INT (-actual_fsize)); + emit_insn (gen_stack_pointer_dec (reg)); + } + } +} + +/* Return true if it is appropriate to emit `return' instructions in the + body of a function. */ + +bool +sparc_can_use_return_insn_p (void) +{ + return sparc_prologue_data_valid_p + && num_gfregs == 0 + && (actual_fsize == 0 || !sparc_leaf_function_p); +} + +/* This function generates the assembly code for function exit. */ + +static void +sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + /* If the last two instructions of a function are "call foo; dslot;" + the return address might point to the first instruction in the next + function and we have to output a dummy nop for the sake of sane + backtraces in such cases. This is pointless for sibling calls since + the return address is explicitly adjusted. */ + + rtx insn, last_real_insn; + + insn = get_last_insn (); + + last_real_insn = prev_real_insn (insn); + if (last_real_insn + && GET_CODE (last_real_insn) == INSN + && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE) + last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0); + + if (last_real_insn + && CALL_P (last_real_insn) + && !SIBLING_CALL_P (last_real_insn)) + fputs("\tnop\n", file); + + sparc_output_deferred_case_vectors (); +} + +/* Output a 'restore' instruction. */ + +static void +output_restore (rtx pat) +{ + rtx operands[3]; + + if (! pat) + { + fputs ("\t restore\n", asm_out_file); + return; + } + + gcc_assert (GET_CODE (pat) == SET); + + operands[0] = SET_DEST (pat); + pat = SET_SRC (pat); + + switch (GET_CODE (pat)) + { + case PLUS: + operands[1] = XEXP (pat, 0); + operands[2] = XEXP (pat, 1); + output_asm_insn (" restore %r1, %2, %Y0", operands); + break; + case LO_SUM: + operands[1] = XEXP (pat, 0); + operands[2] = XEXP (pat, 1); + output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands); + break; + case ASHIFT: + operands[1] = XEXP (pat, 0); + gcc_assert (XEXP (pat, 1) == const1_rtx); + output_asm_insn (" restore %r1, %r1, %Y0", operands); + break; + default: + operands[1] = pat; + output_asm_insn (" restore %%g0, %1, %Y0", operands); + break; + } +} + +/* Output a return. */ + +const char * +output_return (rtx insn) +{ + if (sparc_leaf_function_p) + { + /* This is a leaf function so we don't have to bother restoring the + register window, which frees us from dealing with the convoluted + semantics of restore/return. We simply output the jump to the + return address and the insn in the delay slot (if any). */ + + gcc_assert (! crtl->calls_eh_return); + + return "jmp\t%%o7+%)%#"; + } + else + { + /* This is a regular function so we have to restore the register window. + We may have a pending insn for the delay slot, which will be either + combined with the 'restore' instruction or put in the delay slot of + the 'return' instruction. */ + + if (crtl->calls_eh_return) + { + /* If the function uses __builtin_eh_return, the eh_return + machinery occupies the delay slot. */ + gcc_assert (! final_sequence); + + if (flag_delayed_branch) + { + if (TARGET_V9) + fputs ("\treturn\t%i7+8\n", asm_out_file); + else + fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file); + + fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file); + } + else + { + fputs ("\trestore\n\tadd\t%sp, %g1, %sp\n", asm_out_file); + fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file); + } + } + else if (final_sequence) + { + rtx delay, pat; + + delay = NEXT_INSN (insn); + gcc_assert (delay); + + pat = PATTERN (delay); + + if (TARGET_V9 && ! epilogue_renumber (&pat, 1)) + { + epilogue_renumber (&pat, 0); + return "return\t%%i7+%)%#"; + } + else + { + output_asm_insn ("jmp\t%%i7+%)", NULL); + output_restore (pat); + PATTERN (delay) = gen_blockage (); + INSN_CODE (delay) = -1; + } + } + else + { + /* The delay slot is empty. */ + if (TARGET_V9) + return "return\t%%i7+%)\n\t nop"; + else if (flag_delayed_branch) + return "jmp\t%%i7+%)\n\t restore"; + else + return "restore\n\tjmp\t%%o7+%)\n\t nop"; + } + } + + return ""; +} + +/* Output a sibling call. */ + +const char * +output_sibcall (rtx insn, rtx call_operand) +{ + rtx operands[1]; + + gcc_assert (flag_delayed_branch); + + operands[0] = call_operand; + + if (sparc_leaf_function_p) + { + /* This is a leaf function so we don't have to bother restoring the + register window. We simply output the jump to the function and + the insn in the delay slot (if any). */ + + gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence)); + + if (final_sequence) + output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#", + operands); + else + /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize + it into branch if possible. */ + output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7", + operands); + } + else + { + /* This is a regular function so we have to restore the register window. + We may have a pending insn for the delay slot, which will be combined + with the 'restore' instruction. */ + + output_asm_insn ("call\t%a0, 0", operands); + + if (final_sequence) + { + rtx delay = NEXT_INSN (insn); + gcc_assert (delay); + + output_restore (PATTERN (delay)); + + PATTERN (delay) = gen_blockage (); + INSN_CODE (delay) = -1; + } + else + output_restore (NULL_RTX); + } + + return ""; +} + +/* Functions for handling argument passing. + + For 32-bit, the first 6 args are normally in registers and the rest are + pushed. Any arg that starts within the first 6 words is at least + partially passed in a register unless its data type forbids. + + For 64-bit, the argument registers are laid out as an array of 16 elements + and arguments are added sequentially. The first 6 int args and up to the + first 16 fp args (depending on size) are passed in regs. + + Slot Stack Integral Float Float in structure Double Long Double + ---- ----- -------- ----- ------------------ ------ ----------- + 15 [SP+248] %f31 %f30,%f31 %d30 + 14 [SP+240] %f29 %f28,%f29 %d28 %q28 + 13 [SP+232] %f27 %f26,%f27 %d26 + 12 [SP+224] %f25 %f24,%f25 %d24 %q24 + 11 [SP+216] %f23 %f22,%f23 %d22 + 10 [SP+208] %f21 %f20,%f21 %d20 %q20 + 9 [SP+200] %f19 %f18,%f19 %d18 + 8 [SP+192] %f17 %f16,%f17 %d16 %q16 + 7 [SP+184] %f15 %f14,%f15 %d14 + 6 [SP+176] %f13 %f12,%f13 %d12 %q12 + 5 [SP+168] %o5 %f11 %f10,%f11 %d10 + 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8 + 3 [SP+152] %o3 %f7 %f6,%f7 %d6 + 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4 + 1 [SP+136] %o1 %f3 %f2,%f3 %d2 + 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0 + + Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise. + + Integral arguments are always passed as 64-bit quantities appropriately + extended. + + Passing of floating point values is handled as follows. + If a prototype is in scope: + If the value is in a named argument (i.e. not a stdarg function or a + value not part of the `...') then the value is passed in the appropriate + fp reg. + If the value is part of the `...' and is passed in one of the first 6 + slots then the value is passed in the appropriate int reg. + If the value is part of the `...' and is not passed in one of the first 6 + slots then the value is passed in memory. + If a prototype is not in scope: + If the value is one of the first 6 arguments the value is passed in the + appropriate integer reg and the appropriate fp reg. + If the value is not one of the first 6 arguments the value is passed in + the appropriate fp reg and in memory. + + + Summary of the calling conventions implemented by GCC on the SPARC: + + 32-bit ABI: + size argument return value + + small integer <4 int. reg. int. reg. + word 4 int. reg. int. reg. + double word 8 int. reg. int. reg. + + _Complex small integer <8 int. reg. int. reg. + _Complex word 8 int. reg. int. reg. + _Complex double word 16 memory int. reg. + + vector integer <=8 int. reg. FP reg. + vector integer >8 memory memory + + float 4 int. reg. FP reg. + double 8 int. reg. FP reg. + long double 16 memory memory + + _Complex float 8 memory FP reg. + _Complex double 16 memory FP reg. + _Complex long double 32 memory FP reg. + + vector float any memory memory + + aggregate any memory memory + + + + 64-bit ABI: + size argument return value + + small integer <8 int. reg. int. reg. + word 8 int. reg. int. reg. + double word 16 int. reg. int. reg. + + _Complex small integer <16 int. reg. int. reg. + _Complex word 16 int. reg. int. reg. + _Complex double word 32 memory int. reg. + + vector integer <=16 FP reg. FP reg. + vector integer 1632 memory memory + + float 4 FP reg. FP reg. + double 8 FP reg. FP reg. + long double 16 FP reg. FP reg. + + _Complex float 8 FP reg. FP reg. + _Complex double 16 FP reg. FP reg. + _Complex long double 32 memory FP reg. + + vector float <=16 FP reg. FP reg. + vector float 1632 memory memory + + aggregate <=16 reg. reg. + aggregate 1632 memory memory + + + +Note #1: complex floating-point types follow the extended SPARC ABIs as +implemented by the Sun compiler. + +Note #2: integral vector types follow the scalar floating-point types +conventions to match what is implemented by the Sun VIS SDK. + +Note #3: floating-point vector types follow the aggregate types +conventions. */ + + +/* Maximum number of int regs for args. */ +#define SPARC_INT_ARG_MAX 6 +/* Maximum number of fp regs for args. */ +#define SPARC_FP_ARG_MAX 16 + +#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Handle the INIT_CUMULATIVE_ARGS macro. + Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +void +init_cumulative_args (struct sparc_args *cum, tree fntype, + rtx libname ATTRIBUTE_UNUSED, + tree fndecl ATTRIBUTE_UNUSED) +{ + cum->words = 0; + cum->prototype_p = fntype && prototype_p (fntype); + cum->libcall_p = fntype == 0; +} + +/* Handle promotion of pointer and integer arguments. */ + +static enum machine_mode +sparc_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, + enum machine_mode mode, + int *punsignedp ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + int for_return ATTRIBUTE_UNUSED) +{ + if (POINTER_TYPE_P (type)) + { + *punsignedp = POINTERS_EXTEND_UNSIGNED; + return Pmode; + } + + /* Integral arguments are passed as full words, as per the ABI. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < UNITS_PER_WORD) + return word_mode; + + return mode; +} + +/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */ + +static bool +sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED) +{ + return TARGET_ARCH64 ? true : false; +} + +/* Scan the record type TYPE and return the following predicates: + - INTREGS_P: the record contains at least one field or sub-field + that is eligible for promotion in integer registers. + - FP_REGS_P: the record contains at least one field or sub-field + that is eligible for promotion in floating-point registers. + - PACKED_P: the record contains at least one field that is packed. + + Sub-fields are not taken into account for the PACKED_P predicate. */ + +static void +scan_record_type (const_tree type, int *intregs_p, int *fpregs_p, + int *packed_p) +{ + tree field; + + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL) + { + if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) + scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0); + else if ((FLOAT_TYPE_P (TREE_TYPE (field)) + || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) + && TARGET_FPU) + *fpregs_p = 1; + else + *intregs_p = 1; + + if (packed_p && DECL_PACKED (field)) + *packed_p = 1; + } + } +} + +/* Compute the slot number to pass an argument in. + Return the slot number or -1 if passing on the stack. + + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. + *PREGNO records the register number to use if scalar type. + *PPADDING records the amount of padding needed in words. */ + +static int +function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode, + const_tree type, bool named, bool incoming_p, + int *pregno, int *ppadding) +{ + int regbase = (incoming_p + ? SPARC_INCOMING_INT_ARG_FIRST + : SPARC_OUTGOING_INT_ARG_FIRST); + int slotno = cum->words; + enum mode_class mclass; + int regno; + + *ppadding = 0; + + if (type && TREE_ADDRESSABLE (type)) + return -1; + + if (TARGET_ARCH32 + && mode == BLKmode + && type + && TYPE_ALIGN (type) % PARM_BOUNDARY != 0) + return -1; + + /* For SPARC64, objects requiring 16-byte alignment get it. */ + if (TARGET_ARCH64 + && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128 + && (slotno & 1) != 0) + slotno++, *ppadding = 1; + + mclass = GET_MODE_CLASS (mode); + if (type && TREE_CODE (type) == VECTOR_TYPE) + { + /* Vector types deserve special treatment because they are + polymorphic wrt their mode, depending upon whether VIS + instructions are enabled. */ + if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) + { + /* The SPARC port defines no floating-point vector modes. */ + gcc_assert (mode == BLKmode); + } + else + { + /* Integral vector types should either have a vector + mode or an integral mode, because we are guaranteed + by pass_by_reference that their size is not greater + than 16 bytes and TImode is 16-byte wide. */ + gcc_assert (mode != BLKmode); + + /* Vector integers are handled like floats according to + the Sun VIS SDK. */ + mclass = MODE_FLOAT; + } + } + + switch (mclass) + { + case MODE_FLOAT: + case MODE_COMPLEX_FLOAT: + case MODE_VECTOR_INT: + if (TARGET_ARCH64 && TARGET_FPU && named) + { + if (slotno >= SPARC_FP_ARG_MAX) + return -1; + regno = SPARC_FP_ARG_FIRST + slotno * 2; + /* Arguments filling only one single FP register are + right-justified in the outer double FP register. */ + if (GET_MODE_SIZE (mode) <= 4) + regno++; + break; + } + /* fallthrough */ + + case MODE_INT: + case MODE_COMPLEX_INT: + if (slotno >= SPARC_INT_ARG_MAX) + return -1; + regno = regbase + slotno; + break; + + case MODE_RANDOM: + if (mode == VOIDmode) + /* MODE is VOIDmode when generating the actual call. */ + return -1; + + gcc_assert (mode == BLKmode); + + if (TARGET_ARCH32 + || !type + || (TREE_CODE (type) != VECTOR_TYPE + && TREE_CODE (type) != RECORD_TYPE)) + { + if (slotno >= SPARC_INT_ARG_MAX) + return -1; + regno = regbase + slotno; + } + else /* TARGET_ARCH64 && type */ + { + int intregs_p = 0, fpregs_p = 0, packed_p = 0; + + /* First see what kinds of registers we would need. */ + if (TREE_CODE (type) == VECTOR_TYPE) + fpregs_p = 1; + else + scan_record_type (type, &intregs_p, &fpregs_p, &packed_p); + + /* The ABI obviously doesn't specify how packed structures + are passed. These are defined to be passed in int regs + if possible, otherwise memory. */ + if (packed_p || !named) + fpregs_p = 0, intregs_p = 1; + + /* If all arg slots are filled, then must pass on stack. */ + if (fpregs_p && slotno >= SPARC_FP_ARG_MAX) + return -1; + + /* If there are only int args and all int arg slots are filled, + then must pass on stack. */ + if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX) + return -1; + + /* Note that even if all int arg slots are filled, fp members may + still be passed in regs if such regs are available. + *PREGNO isn't set because there may be more than one, it's up + to the caller to compute them. */ + return slotno; + } + break; + + default : + gcc_unreachable (); + } + + *pregno = regno; + return slotno; +} + +/* Handle recursive register counting for structure field layout. */ + +struct function_arg_record_value_parms +{ + rtx ret; /* return expression being built. */ + int slotno; /* slot number of the argument. */ + int named; /* whether the argument is named. */ + int regbase; /* regno of the base register. */ + int stack; /* 1 if part of the argument is on the stack. */ + int intoffset; /* offset of the first pending integer field. */ + unsigned int nregs; /* number of words passed in registers. */ +}; + +static void function_arg_record_value_3 + (HOST_WIDE_INT, struct function_arg_record_value_parms *); +static void function_arg_record_value_2 + (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool); +static void function_arg_record_value_1 + (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool); +static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int); +static rtx function_arg_union_value (int, enum machine_mode, int, int); + +/* A subroutine of function_arg_record_value. Traverse the structure + recursively and determine how many registers will be required. */ + +static void +function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos, + struct function_arg_record_value_parms *parms, + bool packed_p) +{ + tree field; + + /* We need to compute how many registers are needed so we can + allocate the PARALLEL but before we can do that we need to know + whether there are any packed fields. The ABI obviously doesn't + specify how structures are passed in this case, so they are + defined to be passed in int regs if possible, otherwise memory, + regardless of whether there are fp values present. */ + + if (! packed_p) + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) + { + packed_p = true; + break; + } + } + + /* Compute how many registers we need. */ + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL) + { + HOST_WIDE_INT bitpos = startbitpos; + + if (DECL_SIZE (field) != 0) + { + if (integer_zerop (DECL_SIZE (field))) + continue; + + if (host_integerp (bit_position (field), 1)) + bitpos += int_bit_position (field); + } + + /* ??? FIXME: else assume zero offset. */ + + if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) + function_arg_record_value_1 (TREE_TYPE (field), + bitpos, + parms, + packed_p); + else if ((FLOAT_TYPE_P (TREE_TYPE (field)) + || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) + && TARGET_FPU + && parms->named + && ! packed_p) + { + if (parms->intoffset != -1) + { + unsigned int startbit, endbit; + int intslots, this_slotno; + + startbit = parms->intoffset & -BITS_PER_WORD; + endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD; + + intslots = (endbit - startbit) / BITS_PER_WORD; + this_slotno = parms->slotno + parms->intoffset + / BITS_PER_WORD; + + if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno) + { + intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno); + /* We need to pass this field on the stack. */ + parms->stack = 1; + } + + parms->nregs += intslots; + parms->intoffset = -1; + } + + /* There's no need to check this_slotno < SPARC_FP_ARG MAX. + If it wasn't true we wouldn't be here. */ + if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE + && DECL_MODE (field) == BLKmode) + parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); + else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) + parms->nregs += 2; + else + parms->nregs += 1; + } + else + { + if (parms->intoffset == -1) + parms->intoffset = bitpos; + } + } + } +} + +/* A subroutine of function_arg_record_value. Assign the bits of the + structure between parms->intoffset and bitpos to integer registers. */ + +static void +function_arg_record_value_3 (HOST_WIDE_INT bitpos, + struct function_arg_record_value_parms *parms) +{ + enum machine_mode mode; + unsigned int regno; + unsigned int startbit, endbit; + int this_slotno, intslots, intoffset; + rtx reg; + + if (parms->intoffset == -1) + return; + + intoffset = parms->intoffset; + parms->intoffset = -1; + + startbit = intoffset & -BITS_PER_WORD; + endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD; + intslots = (endbit - startbit) / BITS_PER_WORD; + this_slotno = parms->slotno + intoffset / BITS_PER_WORD; + + intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno); + if (intslots <= 0) + return; + + /* If this is the trailing part of a word, only load that much into + the register. Otherwise load the whole register. Note that in + the latter case we may pick up unwanted bits. It's not a problem + at the moment but may wish to revisit. */ + + if (intoffset % BITS_PER_WORD != 0) + mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD, + MODE_INT); + else + mode = word_mode; + + intoffset /= BITS_PER_UNIT; + do + { + regno = parms->regbase + this_slotno; + reg = gen_rtx_REG (mode, regno); + XVECEXP (parms->ret, 0, parms->stack + parms->nregs) + = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); + + this_slotno += 1; + intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1; + mode = word_mode; + parms->nregs += 1; + intslots -= 1; + } + while (intslots > 0); +} + +/* A subroutine of function_arg_record_value. Traverse the structure + recursively and assign bits to floating point registers. Track which + bits in between need integer registers; invoke function_arg_record_value_3 + to make that happen. */ + +static void +function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos, + struct function_arg_record_value_parms *parms, + bool packed_p) +{ + tree field; + + if (! packed_p) + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) + { + packed_p = true; + break; + } + } + + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL) + { + HOST_WIDE_INT bitpos = startbitpos; + + if (DECL_SIZE (field) != 0) + { + if (integer_zerop (DECL_SIZE (field))) + continue; + + if (host_integerp (bit_position (field), 1)) + bitpos += int_bit_position (field); + } + + /* ??? FIXME: else assume zero offset. */ + + if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) + function_arg_record_value_2 (TREE_TYPE (field), + bitpos, + parms, + packed_p); + else if ((FLOAT_TYPE_P (TREE_TYPE (field)) + || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) + && TARGET_FPU + && parms->named + && ! packed_p) + { + int this_slotno = parms->slotno + bitpos / BITS_PER_WORD; + int regno, nregs, pos; + enum machine_mode mode = DECL_MODE (field); + rtx reg; + + function_arg_record_value_3 (bitpos, parms); + + if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE + && mode == BLKmode) + { + mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); + nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); + } + else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) + { + mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); + nregs = 2; + } + else + nregs = 1; + + regno = SPARC_FP_ARG_FIRST + this_slotno * 2; + if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0) + regno++; + reg = gen_rtx_REG (mode, regno); + pos = bitpos / BITS_PER_UNIT; + XVECEXP (parms->ret, 0, parms->stack + parms->nregs) + = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); + parms->nregs += 1; + while (--nregs > 0) + { + regno += GET_MODE_SIZE (mode) / 4; + reg = gen_rtx_REG (mode, regno); + pos += GET_MODE_SIZE (mode); + XVECEXP (parms->ret, 0, parms->stack + parms->nregs) + = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); + parms->nregs += 1; + } + } + else + { + if (parms->intoffset == -1) + parms->intoffset = bitpos; + } + } + } +} + +/* Used by function_arg and sparc_function_value_1 to implement the complex + conventions of the 64-bit ABI for passing and returning structures. + Return an expression valid as a return value for the FUNCTION_ARG + and TARGET_FUNCTION_VALUE. + + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + MODE is the argument's machine mode. + SLOTNO is the index number of the argument's slot in the parameter array. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + REGBASE is the regno of the base register for the parameter array. */ + +static rtx +function_arg_record_value (const_tree type, enum machine_mode mode, + int slotno, int named, int regbase) +{ + HOST_WIDE_INT typesize = int_size_in_bytes (type); + struct function_arg_record_value_parms parms; + unsigned int nregs; + + parms.ret = NULL_RTX; + parms.slotno = slotno; + parms.named = named; + parms.regbase = regbase; + parms.stack = 0; + + /* Compute how many registers we need. */ + parms.nregs = 0; + parms.intoffset = 0; + function_arg_record_value_1 (type, 0, &parms, false); + + /* Take into account pending integer fields. */ + if (parms.intoffset != -1) + { + unsigned int startbit, endbit; + int intslots, this_slotno; + + startbit = parms.intoffset & -BITS_PER_WORD; + endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD; + intslots = (endbit - startbit) / BITS_PER_WORD; + this_slotno = slotno + parms.intoffset / BITS_PER_WORD; + + if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno) + { + intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno); + /* We need to pass this field on the stack. */ + parms.stack = 1; + } + + parms.nregs += intslots; + } + nregs = parms.nregs; + + /* Allocate the vector and handle some annoying special cases. */ + if (nregs == 0) + { + /* ??? Empty structure has no value? Duh? */ + if (typesize <= 0) + { + /* Though there's nothing really to store, return a word register + anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL + leads to breakage due to the fact that there are zero bytes to + load. */ + return gen_rtx_REG (mode, regbase); + } + else + { + /* ??? C++ has structures with no fields, and yet a size. Give up + for now and pass everything back in integer registers. */ + nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + } + if (nregs + slotno > SPARC_INT_ARG_MAX) + nregs = SPARC_INT_ARG_MAX - slotno; + } + gcc_assert (nregs != 0); + + parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs)); + + /* If at least one field must be passed on the stack, generate + (parallel [(expr_list (nil) ...) ...]) so that all fields will + also be passed on the stack. We can't do much better because the + semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case + of structures for which the fields passed exclusively in registers + are not at the beginning of the structure. */ + if (parms.stack) + XVECEXP (parms.ret, 0, 0) + = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); + + /* Fill in the entries. */ + parms.nregs = 0; + parms.intoffset = 0; + function_arg_record_value_2 (type, 0, &parms, false); + function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms); + + gcc_assert (parms.nregs == nregs); + + return parms.ret; +} + +/* Used by function_arg and sparc_function_value_1 to implement the conventions + of the 64-bit ABI for passing and returning unions. + Return an expression valid as a return value for the FUNCTION_ARG + and TARGET_FUNCTION_VALUE. + + SIZE is the size in bytes of the union. + MODE is the argument's machine mode. + REGNO is the hard register the union will be passed in. */ + +static rtx +function_arg_union_value (int size, enum machine_mode mode, int slotno, + int regno) +{ + int nwords = ROUND_ADVANCE (size), i; + rtx regs; + + /* See comment in previous function for empty structures. */ + if (nwords == 0) + return gen_rtx_REG (mode, regno); + + if (slotno == SPARC_INT_ARG_MAX - 1) + nwords = 1; + + regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords)); + + for (i = 0; i < nwords; i++) + { + /* Unions are passed left-justified. */ + XVECEXP (regs, 0, i) + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (word_mode, regno), + GEN_INT (UNITS_PER_WORD * i)); + regno++; + } + + return regs; +} + +/* Used by function_arg and sparc_function_value_1 to implement the conventions + for passing and returning large (BLKmode) vectors. + Return an expression valid as a return value for the FUNCTION_ARG + and TARGET_FUNCTION_VALUE. + + SIZE is the size in bytes of the vector (at least 8 bytes). + REGNO is the FP hard register the vector will be passed in. */ + +static rtx +function_arg_vector_value (int size, int regno) +{ + int i, nregs = size / 8; + rtx regs; + + regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs)); + + for (i = 0; i < nregs; i++) + { + XVECEXP (regs, 0, i) + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, regno + 2*i), + GEN_INT (i*8)); + } + + return regs; +} + +/* Determine where to put an argument to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + NAMED is true if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + INCOMING_P is false for TARGET_FUNCTION_ARG, true for + TARGET_FUNCTION_INCOMING_ARG. */ + +static rtx +sparc_function_arg_1 (const CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named, bool incoming_p) +{ + int regbase = (incoming_p + ? SPARC_INCOMING_INT_ARG_FIRST + : SPARC_OUTGOING_INT_ARG_FIRST); + int slotno, regno, padding; + enum mode_class mclass = GET_MODE_CLASS (mode); + + slotno = function_arg_slotno (cum, mode, type, named, incoming_p, + ®no, &padding); + if (slotno == -1) + return 0; + + /* Vector types deserve special treatment because they are polymorphic wrt + their mode, depending upon whether VIS instructions are enabled. */ + if (type && TREE_CODE (type) == VECTOR_TYPE) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + gcc_assert ((TARGET_ARCH32 && size <= 8) + || (TARGET_ARCH64 && size <= 16)); + + if (mode == BLKmode) + return function_arg_vector_value (size, + SPARC_FP_ARG_FIRST + 2*slotno); + else + mclass = MODE_FLOAT; + } + + if (TARGET_ARCH32) + return gen_rtx_REG (mode, regno); + + /* Structures up to 16 bytes in size are passed in arg slots on the stack + and are promoted to registers if possible. */ + if (type && TREE_CODE (type) == RECORD_TYPE) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + gcc_assert (size <= 16); + + return function_arg_record_value (type, mode, slotno, named, regbase); + } + + /* Unions up to 16 bytes in size are passed in integer registers. */ + else if (type && TREE_CODE (type) == UNION_TYPE) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + gcc_assert (size <= 16); + + return function_arg_union_value (size, mode, slotno, regno); + } + + /* v9 fp args in reg slots beyond the int reg slots get passed in regs + but also have the slot allocated for them. + If no prototype is in scope fp values in register slots get passed + in two places, either fp regs and int regs or fp regs and memory. */ + else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) + && SPARC_FP_REG_P (regno)) + { + rtx reg = gen_rtx_REG (mode, regno); + if (cum->prototype_p || cum->libcall_p) + { + /* "* 2" because fp reg numbers are recorded in 4 byte + quantities. */ +#if 0 + /* ??? This will cause the value to be passed in the fp reg and + in the stack. When a prototype exists we want to pass the + value in the reg but reserve space on the stack. That's an + optimization, and is deferred [for a bit]. */ + if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2) + return gen_rtx_PARALLEL (mode, + gen_rtvec (2, + gen_rtx_EXPR_LIST (VOIDmode, + NULL_RTX, const0_rtx), + gen_rtx_EXPR_LIST (VOIDmode, + reg, const0_rtx))); + else +#else + /* ??? It seems that passing back a register even when past + the area declared by REG_PARM_STACK_SPACE will allocate + space appropriately, and will not copy the data onto the + stack, exactly as we desire. + + This is due to locate_and_pad_parm being called in + expand_call whenever reg_parm_stack_space > 0, which + while beneficial to our example here, would seem to be + in error from what had been intended. Ho hum... -- r~ */ +#endif + return reg; + } + else + { + rtx v0, v1; + + if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2) + { + int intreg; + + /* On incoming, we don't need to know that the value + is passed in %f0 and %i0, and it confuses other parts + causing needless spillage even on the simplest cases. */ + if (incoming_p) + return reg; + + intreg = (SPARC_OUTGOING_INT_ARG_FIRST + + (regno - SPARC_FP_ARG_FIRST) / 2); + + v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); + v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg), + const0_rtx); + return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); + } + else + { + v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); + v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); + return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); + } + } + } + + /* All other aggregate types are passed in an integer register in a mode + corresponding to the size of the type. */ + else if (type && AGGREGATE_TYPE_P (type)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + gcc_assert (size <= 16); + + mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); + } + + return gen_rtx_REG (mode, regno); +} + +/* Handle the TARGET_FUNCTION_ARG target hook. */ + +static rtx +sparc_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named) +{ + return sparc_function_arg_1 (cum, mode, type, named, false); +} + +/* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */ + +static rtx +sparc_function_incoming_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named) +{ + return sparc_function_arg_1 (cum, mode, type, named, true); +} + +/* For sparc64, objects requiring 16 byte alignment are passed that way. */ + +static unsigned int +sparc_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + return ((TARGET_ARCH64 + && (GET_MODE_ALIGNMENT (mode) == 128 + || (type && TYPE_ALIGN (type) == 128))) + ? 128 + : PARM_BOUNDARY); +} + +/* For an arg passed partly in registers and partly in memory, + this is the number of bytes of registers used. + For args passed entirely in registers or entirely in memory, zero. + + Any arg that starts in the first 6 regs but won't entirely fit in them + needs partial registers on v8. On v9, structures with integer + values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp + values that begin in the last fp reg [where "last fp reg" varies with the + mode] will be split between that reg and memory. */ + +static int +sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, + tree type, bool named) +{ + int slotno, regno, padding; + + /* We pass false for incoming_p here, it doesn't matter. */ + slotno = function_arg_slotno (cum, mode, type, named, false, + ®no, &padding); + + if (slotno == -1) + return 0; + + if (TARGET_ARCH32) + { + if ((slotno + (mode == BLKmode + ? ROUND_ADVANCE (int_size_in_bytes (type)) + : ROUND_ADVANCE (GET_MODE_SIZE (mode)))) + > SPARC_INT_ARG_MAX) + return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD; + } + else + { + /* We are guaranteed by pass_by_reference that the size of the + argument is not greater than 16 bytes, so we only need to return + one word if the argument is partially passed in registers. */ + + if (type && AGGREGATE_TYPE_P (type)) + { + int size = int_size_in_bytes (type); + + if (size > UNITS_PER_WORD + && slotno == SPARC_INT_ARG_MAX - 1) + return UNITS_PER_WORD; + } + else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT + || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT + && ! (TARGET_FPU && named))) + { + /* The complex types are passed as packed types. */ + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD + && slotno == SPARC_INT_ARG_MAX - 1) + return UNITS_PER_WORD; + } + else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) + { + if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD) + > SPARC_FP_ARG_MAX) + return UNITS_PER_WORD; + } + } + + return 0; +} + +/* Handle the TARGET_PASS_BY_REFERENCE target hook. + Specify whether to pass the argument by reference. */ + +static bool +sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, + enum machine_mode mode, const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + if (TARGET_ARCH32) + /* Original SPARC 32-bit ABI says that structures and unions, + and quad-precision floats are passed by reference. For Pascal, + also pass arrays by reference. All other base types are passed + in registers. + + Extended ABI (as implemented by the Sun compiler) says that all + complex floats are passed by reference. Pass complex integers + in registers up to 8 bytes. More generally, enforce the 2-word + cap for passing arguments in registers. + + Vector ABI (as implemented by the Sun VIS SDK) says that vector + integers are passed like floats of the same size, that is in + registers up to 8 bytes. Pass all vector floats by reference + like structure and unions. */ + return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))) + || mode == SCmode + /* Catch CDImode, TFmode, DCmode and TCmode. */ + || GET_MODE_SIZE (mode) > 8 + || (type + && TREE_CODE (type) == VECTOR_TYPE + && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); + else + /* Original SPARC 64-bit ABI says that structures and unions + smaller than 16 bytes are passed in registers, as well as + all other base types. + + Extended ABI (as implemented by the Sun compiler) says that + complex floats are passed in registers up to 16 bytes. Pass + all complex integers in registers up to 16 bytes. More generally, + enforce the 2-word cap for passing arguments in registers. + + Vector ABI (as implemented by the Sun VIS SDK) says that vector + integers are passed like floats of the same size, that is in + registers (up to 16 bytes). Pass all vector floats like structure + and unions. */ + return ((type + && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE) + && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16) + /* Catch CTImode and TCmode. */ + || GET_MODE_SIZE (mode) > 16); +} + +/* Handle the TARGET_FUNCTION_ARG_ADVANCE hook. + Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + TYPE is null for libcalls where that information may not be available. */ + +static void +sparc_function_arg_advance (struct sparc_args *cum, enum machine_mode mode, + const_tree type, bool named) +{ + int regno, padding; + + /* We pass false for incoming_p here, it doesn't matter. */ + function_arg_slotno (cum, mode, type, named, false, ®no, &padding); + + /* If argument requires leading padding, add it. */ + cum->words += padding; + + if (TARGET_ARCH32) + { + cum->words += (mode != BLKmode + ? ROUND_ADVANCE (GET_MODE_SIZE (mode)) + : ROUND_ADVANCE (int_size_in_bytes (type))); + } + else + { + if (type && AGGREGATE_TYPE_P (type)) + { + int size = int_size_in_bytes (type); + + if (size <= 8) + ++cum->words; + else if (size <= 16) + cum->words += 2; + else /* passed by reference */ + ++cum->words; + } + else + { + cum->words += (mode != BLKmode + ? ROUND_ADVANCE (GET_MODE_SIZE (mode)) + : ROUND_ADVANCE (int_size_in_bytes (type))); + } + } +} + +/* Handle the FUNCTION_ARG_PADDING macro. + For the 64 bit ABI structs are always stored left shifted in their + argument slot. */ + +enum direction +function_arg_padding (enum machine_mode mode, const_tree type) +{ + if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type)) + return upward; + + /* Fall back to the default. */ + return DEFAULT_FUNCTION_ARG_PADDING (mode, type); +} + +/* Handle the TARGET_RETURN_IN_MEMORY target hook. + Specify whether to return the return value in memory. */ + +static bool +sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + if (TARGET_ARCH32) + /* Original SPARC 32-bit ABI says that structures and unions, + and quad-precision floats are returned in memory. All other + base types are returned in registers. + + Extended ABI (as implemented by the Sun compiler) says that + all complex floats are returned in registers (8 FP registers + at most for '_Complex long double'). Return all complex integers + in registers (4 at most for '_Complex long long'). + + Vector ABI (as implemented by the Sun VIS SDK) says that vector + integers are returned like floats of the same size, that is in + registers up to 8 bytes and in memory otherwise. Return all + vector floats in memory like structure and unions; note that + they always have BLKmode like the latter. */ + return (TYPE_MODE (type) == BLKmode + || TYPE_MODE (type) == TFmode + || (TREE_CODE (type) == VECTOR_TYPE + && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); + else + /* Original SPARC 64-bit ABI says that structures and unions + smaller than 32 bytes are returned in registers, as well as + all other base types. + + Extended ABI (as implemented by the Sun compiler) says that all + complex floats are returned in registers (8 FP registers at most + for '_Complex long double'). Return all complex integers in + registers (4 at most for '_Complex TItype'). + + Vector ABI (as implemented by the Sun VIS SDK) says that vector + integers are returned like floats of the same size, that is in + registers. Return all vector floats like structure and unions; + note that they always have BLKmode like the latter. */ + return (TYPE_MODE (type) == BLKmode + && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32); +} + +/* Handle the TARGET_STRUCT_VALUE target hook. + Return where to find the structure return value address. */ + +static rtx +sparc_struct_value_rtx (tree fndecl, int incoming) +{ + if (TARGET_ARCH64) + return 0; + else + { + rtx mem; + + if (incoming) + mem = gen_frame_mem (Pmode, plus_constant (frame_pointer_rtx, + STRUCT_VALUE_OFFSET)); + else + mem = gen_frame_mem (Pmode, plus_constant (stack_pointer_rtx, + STRUCT_VALUE_OFFSET)); + + /* Only follow the SPARC ABI for fixed-size structure returns. + Variable size structure returns are handled per the normal + procedures in GCC. This is enabled by -mstd-struct-return */ + if (incoming == 2 + && sparc_std_struct_return + && TYPE_SIZE_UNIT (TREE_TYPE (fndecl)) + && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST) + { + /* We must check and adjust the return address, as it is + optional as to whether the return object is really + provided. */ + rtx ret_rtx = gen_rtx_REG (Pmode, 31); + rtx scratch = gen_reg_rtx (SImode); + rtx endlab = gen_label_rtx (); + + /* Calculate the return object size */ + tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl)); + rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff); + /* Construct a temporary return value */ + rtx temp_val + = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0); + + /* Implement SPARC 32-bit psABI callee return struct checking: + + Fetch the instruction where we will return to and see if + it's an unimp instruction (the most significant 10 bits + will be zero). */ + emit_move_insn (scratch, gen_rtx_MEM (SImode, + plus_constant (ret_rtx, 8))); + /* Assume the size is valid and pre-adjust */ + emit_insn (gen_add3_insn (ret_rtx, ret_rtx, GEN_INT (4))); + emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, + 0, endlab); + emit_insn (gen_sub3_insn (ret_rtx, ret_rtx, GEN_INT (4))); + /* Write the address of the memory pointed to by temp_val into + the memory pointed to by mem */ + emit_move_insn (mem, XEXP (temp_val, 0)); + emit_label (endlab); + } + + return mem; + } +} + +/* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook. + For v9, function return values are subject to the same rules as arguments, + except that up to 32 bytes may be returned in registers. */ + +static rtx +sparc_function_value_1 (const_tree type, enum machine_mode mode, + bool outgoing) +{ + /* Beware that the two values are swapped here wrt function_arg. */ + int regbase = (outgoing + ? SPARC_INCOMING_INT_ARG_FIRST + : SPARC_OUTGOING_INT_ARG_FIRST); + enum mode_class mclass = GET_MODE_CLASS (mode); + int regno; + + /* Vector types deserve special treatment because they are polymorphic wrt + their mode, depending upon whether VIS instructions are enabled. */ + if (type && TREE_CODE (type) == VECTOR_TYPE) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + gcc_assert ((TARGET_ARCH32 && size <= 8) + || (TARGET_ARCH64 && size <= 32)); + + if (mode == BLKmode) + return function_arg_vector_value (size, + SPARC_FP_ARG_FIRST); + else + mclass = MODE_FLOAT; + } + + if (TARGET_ARCH64 && type) + { + /* Structures up to 32 bytes in size are returned in registers. */ + if (TREE_CODE (type) == RECORD_TYPE) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + gcc_assert (size <= 32); + + return function_arg_record_value (type, mode, 0, 1, regbase); + } + + /* Unions up to 32 bytes in size are returned in integer registers. */ + else if (TREE_CODE (type) == UNION_TYPE) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + gcc_assert (size <= 32); + + return function_arg_union_value (size, mode, 0, regbase); + } + + /* Objects that require it are returned in FP registers. */ + else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) + ; + + /* All other aggregate types are returned in an integer register in a + mode corresponding to the size of the type. */ + else if (AGGREGATE_TYPE_P (type)) + { + /* All other aggregate types are passed in an integer register + in a mode corresponding to the size of the type. */ + HOST_WIDE_INT size = int_size_in_bytes (type); + gcc_assert (size <= 32); + + mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); + + /* ??? We probably should have made the same ABI change in + 3.4.0 as the one we made for unions. The latter was + required by the SCD though, while the former is not + specified, so we favored compatibility and efficiency. + + Now we're stuck for aggregates larger than 16 bytes, + because OImode vanished in the meantime. Let's not + try to be unduly clever, and simply follow the ABI + for unions in that case. */ + if (mode == BLKmode) + return function_arg_union_value (size, mode, 0, regbase); + else + mclass = MODE_INT; + } + + /* We should only have pointer and integer types at this point. This + must match sparc_promote_function_mode. */ + else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD) + mode = word_mode; + } + + /* We should only have pointer and integer types at this point. This must + match sparc_promote_function_mode. */ + else if (TARGET_ARCH32 + && mclass == MODE_INT + && GET_MODE_SIZE (mode) < UNITS_PER_WORD) + mode = word_mode; + + if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU) + regno = SPARC_FP_ARG_FIRST; + else + regno = regbase; + + return gen_rtx_REG (mode, regno); +} + +/* Handle TARGET_FUNCTION_VALUE. + On the SPARC, the value is found in the first "output" register, but the + called function leaves it in the first "input" register. */ + +static rtx +sparc_function_value (const_tree valtype, + const_tree fn_decl_or_type ATTRIBUTE_UNUSED, + bool outgoing) +{ + return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing); +} + +/* Handle TARGET_LIBCALL_VALUE. */ + +static rtx +sparc_libcall_value (enum machine_mode mode, + const_rtx fun ATTRIBUTE_UNUSED) +{ + return sparc_function_value_1 (NULL_TREE, mode, false); +} + +/* Handle FUNCTION_VALUE_REGNO_P. + On the SPARC, the first "output" reg is used for integer values, and the + first floating point register is used for floating point values. */ + +static bool +sparc_function_value_regno_p (const unsigned int regno) +{ + return (regno == 8 || regno == 32); +} + +/* Do what is necessary for `va_start'. We look at the current function + to determine if stdarg or varargs is used and return the address of + the first unnamed parameter. */ + +static rtx +sparc_builtin_saveregs (void) +{ + int first_reg = crtl->args.info.words; + rtx address; + int regno; + + for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++) + emit_move_insn (gen_rtx_MEM (word_mode, + gen_rtx_PLUS (Pmode, + frame_pointer_rtx, + GEN_INT (FIRST_PARM_OFFSET (0) + + (UNITS_PER_WORD + * regno)))), + gen_rtx_REG (word_mode, + SPARC_INCOMING_INT_ARG_FIRST + regno)); + + address = gen_rtx_PLUS (Pmode, + frame_pointer_rtx, + GEN_INT (FIRST_PARM_OFFSET (0) + + UNITS_PER_WORD * first_reg)); + + return address; +} + +/* Implement `va_start' for stdarg. */ + +static void +sparc_va_start (tree valist, rtx nextarg) +{ + nextarg = expand_builtin_saveregs (); + std_expand_builtin_va_start (valist, nextarg); +} + +/* Implement `va_arg' for stdarg. */ + +static tree +sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + HOST_WIDE_INT size, rsize, align; + tree addr, incr; + bool indirect; + tree ptrtype = build_pointer_type (type); + + if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) + { + indirect = true; + size = rsize = UNITS_PER_WORD; + align = 0; + } + else + { + indirect = false; + size = int_size_in_bytes (type); + rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; + align = 0; + + if (TARGET_ARCH64) + { + /* For SPARC64, objects requiring 16-byte alignment get it. */ + if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD) + align = 2 * UNITS_PER_WORD; + + /* SPARC-V9 ABI states that structures up to 16 bytes in size + are left-justified in their slots. */ + if (AGGREGATE_TYPE_P (type)) + { + if (size == 0) + size = rsize = UNITS_PER_WORD; + else + size = rsize; + } + } + } + + incr = valist; + if (align) + { + incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, + size_int (align - 1)); + incr = fold_convert (sizetype, incr); + incr = fold_build2 (BIT_AND_EXPR, sizetype, incr, + size_int (-align)); + incr = fold_convert (ptr_type_node, incr); + } + + gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue); + addr = incr; + + if (BYTES_BIG_ENDIAN && size < rsize) + addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, + size_int (rsize - size)); + + if (indirect) + { + addr = fold_convert (build_pointer_type (ptrtype), addr); + addr = build_va_arg_indirect_ref (addr); + } + + /* If the address isn't aligned properly for the type, we need a temporary. + FIXME: This is inefficient, usually we can do this in registers. */ + else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD) + { + tree tmp = create_tmp_var (type, "va_arg_tmp"); + tree dest_addr = build_fold_addr_expr (tmp); + tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY], + 3, dest_addr, addr, size_int (rsize)); + TREE_ADDRESSABLE (tmp) = 1; + gimplify_and_add (copy, pre_p); + addr = dest_addr; + } + + else + addr = fold_convert (ptrtype, addr); + + incr + = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize)); + gimplify_assign (valist, incr, post_p); + + return build_va_arg_indirect_ref (addr); +} + +/* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook. + Specify whether the vector mode is supported by the hardware. */ + +static bool +sparc_vector_mode_supported_p (enum machine_mode mode) +{ + return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false; +} + +/* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */ + +static enum machine_mode +sparc_preferred_simd_mode (enum machine_mode mode) +{ + if (TARGET_VIS) + switch (mode) + { + case SImode: + return V2SImode; + case HImode: + return V4HImode; + case QImode: + return V8QImode; + + default:; + } + + return word_mode; +} + +/* Return the string to output an unconditional branch to LABEL, which is + the operand number of the label. + + DEST is the destination insn (i.e. the label), INSN is the source. */ + +const char * +output_ubranch (rtx dest, int label, rtx insn) +{ + static char string[64]; + bool v9_form = false; + char *p; + + if (TARGET_V9 && INSN_ADDRESSES_SET_P ()) + { + int delta = (INSN_ADDRESSES (INSN_UID (dest)) + - INSN_ADDRESSES (INSN_UID (insn))); + /* Leave some instructions for "slop". */ + if (delta >= -260000 && delta < 260000) + v9_form = true; + } + + if (v9_form) + strcpy (string, "ba%*,pt\t%%xcc, "); + else + strcpy (string, "b%*\t"); + + p = strchr (string, '\0'); + *p++ = '%'; + *p++ = 'l'; + *p++ = '0' + label; + *p++ = '%'; + *p++ = '('; + *p = '\0'; + + return string; +} + +/* Return the string to output a conditional branch to LABEL, which is + the operand number of the label. OP is the conditional expression. + XEXP (OP, 0) is assumed to be a condition code register (integer or + floating point) and its mode specifies what kind of comparison we made. + + DEST is the destination insn (i.e. the label), INSN is the source. + + REVERSED is nonzero if we should reverse the sense of the comparison. + + ANNUL is nonzero if we should generate an annulling branch. */ + +const char * +output_cbranch (rtx op, rtx dest, int label, int reversed, int annul, + rtx insn) +{ + static char string[64]; + enum rtx_code code = GET_CODE (op); + rtx cc_reg = XEXP (op, 0); + enum machine_mode mode = GET_MODE (cc_reg); + const char *labelno, *branch; + int spaces = 8, far; + char *p; + + /* v9 branches are limited to +-1MB. If it is too far away, + change + + bne,pt %xcc, .LC30 + + to + + be,pn %xcc, .+12 + nop + ba .LC30 + + and + + fbne,a,pn %fcc2, .LC29 + + to + + fbe,pt %fcc2, .+16 + nop + ba .LC29 */ + + far = TARGET_V9 && (get_attr_length (insn) >= 3); + if (reversed ^ far) + { + /* Reversal of FP compares takes care -- an ordered compare + becomes an unordered compare and vice versa. */ + if (mode == CCFPmode || mode == CCFPEmode) + code = reverse_condition_maybe_unordered (code); + else + code = reverse_condition (code); + } + + /* Start by writing the branch condition. */ + if (mode == CCFPmode || mode == CCFPEmode) + { + switch (code) + { + case NE: + branch = "fbne"; + break; + case EQ: + branch = "fbe"; + break; + case GE: + branch = "fbge"; + break; + case GT: + branch = "fbg"; + break; + case LE: + branch = "fble"; + break; + case LT: + branch = "fbl"; + break; + case UNORDERED: + branch = "fbu"; + break; + case ORDERED: + branch = "fbo"; + break; + case UNGT: + branch = "fbug"; + break; + case UNLT: + branch = "fbul"; + break; + case UNEQ: + branch = "fbue"; + break; + case UNGE: + branch = "fbuge"; + break; + case UNLE: + branch = "fbule"; + break; + case LTGT: + branch = "fblg"; + break; + + default: + gcc_unreachable (); + } + + /* ??? !v9: FP branches cannot be preceded by another floating point + insn. Because there is currently no concept of pre-delay slots, + we can fix this only by always emitting a nop before a floating + point branch. */ + + string[0] = '\0'; + if (! TARGET_V9) + strcpy (string, "nop\n\t"); + strcat (string, branch); + } + else + { + switch (code) + { + case NE: + branch = "bne"; + break; + case EQ: + branch = "be"; + break; + case GE: + if (mode == CC_NOOVmode || mode == CCX_NOOVmode) + branch = "bpos"; + else + branch = "bge"; + break; + case GT: + branch = "bg"; + break; + case LE: + branch = "ble"; + break; + case LT: + if (mode == CC_NOOVmode || mode == CCX_NOOVmode) + branch = "bneg"; + else + branch = "bl"; + break; + case GEU: + branch = "bgeu"; + break; + case GTU: + branch = "bgu"; + break; + case LEU: + branch = "bleu"; + break; + case LTU: + branch = "blu"; + break; + + default: + gcc_unreachable (); + } + strcpy (string, branch); + } + spaces -= strlen (branch); + p = strchr (string, '\0'); + + /* Now add the annulling, the label, and a possible noop. */ + if (annul && ! far) + { + strcpy (p, ",a"); + p += 2; + spaces -= 2; + } + + if (TARGET_V9) + { + rtx note; + int v8 = 0; + + if (! far && insn && INSN_ADDRESSES_SET_P ()) + { + int delta = (INSN_ADDRESSES (INSN_UID (dest)) + - INSN_ADDRESSES (INSN_UID (insn))); + /* Leave some instructions for "slop". */ + if (delta < -260000 || delta >= 260000) + v8 = 1; + } + + if (mode == CCFPmode || mode == CCFPEmode) + { + static char v9_fcc_labelno[] = "%%fccX, "; + /* Set the char indicating the number of the fcc reg to use. */ + v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0'; + labelno = v9_fcc_labelno; + if (v8) + { + gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG); + labelno = ""; + } + } + else if (mode == CCXmode || mode == CCX_NOOVmode) + { + labelno = "%%xcc, "; + gcc_assert (! v8); + } + else + { + labelno = "%%icc, "; + if (v8) + labelno = ""; + } + + if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) + { + strcpy (p, + ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far) + ? ",pt" : ",pn"); + p += 3; + spaces -= 3; + } + } + else + labelno = ""; + + if (spaces > 0) + *p++ = '\t'; + else + *p++ = ' '; + strcpy (p, labelno); + p = strchr (p, '\0'); + if (far) + { + strcpy (p, ".+12\n\t nop\n\tb\t"); + /* Skip the next insn if requested or + if we know that it will be a nop. */ + if (annul || ! final_sequence) + p[3] = '6'; + p += 14; + } + *p++ = '%'; + *p++ = 'l'; + *p++ = label + '0'; + *p++ = '%'; + *p++ = '#'; + *p = '\0'; + + return string; +} + +/* Emit a library call comparison between floating point X and Y. + COMPARISON is the operator to compare with (EQ, NE, GT, etc). + Return the new operator to be used in the comparison sequence. + + TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode + values as arguments instead of the TFmode registers themselves, + that's why we cannot call emit_float_lib_cmp. */ + +rtx +sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison) +{ + const char *qpfunc; + rtx slot0, slot1, result, tem, tem2, libfunc; + enum machine_mode mode; + enum rtx_code new_comparison; + + switch (comparison) + { + case EQ: + qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq"); + break; + + case NE: + qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne"); + break; + + case GT: + qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt"); + break; + + case GE: + qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge"); + break; + + case LT: + qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt"); + break; + + case LE: + qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle"); + break; + + case ORDERED: + case UNORDERED: + case UNGT: + case UNLT: + case UNEQ: + case UNGE: + case UNLE: + case LTGT: + qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp"); + break; + + default: + gcc_unreachable (); + } + + if (TARGET_ARCH64) + { + if (MEM_P (x)) + slot0 = x; + else + { + slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0); + emit_move_insn (slot0, x); + } + + if (MEM_P (y)) + slot1 = y; + else + { + slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0); + emit_move_insn (slot1, y); + } + + libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); + emit_library_call (libfunc, LCT_NORMAL, + DImode, 2, + XEXP (slot0, 0), Pmode, + XEXP (slot1, 0), Pmode); + mode = DImode; + } + else + { + libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); + emit_library_call (libfunc, LCT_NORMAL, + SImode, 2, + x, TFmode, y, TFmode); + mode = SImode; + } + + + /* Immediately move the result of the libcall into a pseudo + register so reload doesn't clobber the value if it needs + the return register for a spill reg. */ + result = gen_reg_rtx (mode); + emit_move_insn (result, hard_libcall_value (mode, libfunc)); + + switch (comparison) + { + default: + return gen_rtx_NE (VOIDmode, result, const0_rtx); + case ORDERED: + case UNORDERED: + new_comparison = (comparison == UNORDERED ? EQ : NE); + return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3)); + case UNGT: + case UNGE: + new_comparison = (comparison == UNGT ? GT : NE); + return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx); + case UNLE: + return gen_rtx_NE (VOIDmode, result, const2_rtx); + case UNLT: + tem = gen_reg_rtx (mode); + if (TARGET_ARCH32) + emit_insn (gen_andsi3 (tem, result, const1_rtx)); + else + emit_insn (gen_anddi3 (tem, result, const1_rtx)); + return gen_rtx_NE (VOIDmode, tem, const0_rtx); + case UNEQ: + case LTGT: + tem = gen_reg_rtx (mode); + if (TARGET_ARCH32) + emit_insn (gen_addsi3 (tem, result, const1_rtx)); + else + emit_insn (gen_adddi3 (tem, result, const1_rtx)); + tem2 = gen_reg_rtx (mode); + if (TARGET_ARCH32) + emit_insn (gen_andsi3 (tem2, tem, const2_rtx)); + else + emit_insn (gen_anddi3 (tem2, tem, const2_rtx)); + new_comparison = (comparison == UNEQ ? EQ : NE); + return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx); + } + + gcc_unreachable (); +} + +/* Generate an unsigned DImode to FP conversion. This is the same code + optabs would emit if we didn't have TFmode patterns. */ + +void +sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode) +{ + rtx neglab, donelab, i0, i1, f0, in, out; + + out = operands[0]; + in = force_reg (DImode, operands[1]); + neglab = gen_label_rtx (); + donelab = gen_label_rtx (); + i0 = gen_reg_rtx (DImode); + i1 = gen_reg_rtx (DImode); + f0 = gen_reg_rtx (mode); + + emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); + + emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); + emit_jump_insn (gen_jump (donelab)); + emit_barrier (); + + emit_label (neglab); + + emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); + emit_insn (gen_anddi3 (i1, in, const1_rtx)); + emit_insn (gen_iordi3 (i0, i0, i1)); + emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0))); + emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); + + emit_label (donelab); +} + +/* Generate an FP to unsigned DImode conversion. This is the same code + optabs would emit if we didn't have TFmode patterns. */ + +void +sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode) +{ + rtx neglab, donelab, i0, i1, f0, in, out, limit; + + out = operands[0]; + in = force_reg (mode, operands[1]); + neglab = gen_label_rtx (); + donelab = gen_label_rtx (); + i0 = gen_reg_rtx (DImode); + i1 = gen_reg_rtx (DImode); + limit = gen_reg_rtx (mode); + f0 = gen_reg_rtx (mode); + + emit_move_insn (limit, + CONST_DOUBLE_FROM_REAL_VALUE ( + REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode)); + emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab); + + emit_insn (gen_rtx_SET (VOIDmode, + out, + gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in)))); + emit_jump_insn (gen_jump (donelab)); + emit_barrier (); + + emit_label (neglab); + + emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit))); + emit_insn (gen_rtx_SET (VOIDmode, + i0, + gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0)))); + emit_insn (gen_movdi (i1, const1_rtx)); + emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63))); + emit_insn (gen_xordi3 (out, i0, i1)); + + emit_label (donelab); +} + +/* Return the string to output a conditional branch to LABEL, testing + register REG. LABEL is the operand number of the label; REG is the + operand number of the reg. OP is the conditional expression. The mode + of REG says what kind of comparison we made. + + DEST is the destination insn (i.e. the label), INSN is the source. + + REVERSED is nonzero if we should reverse the sense of the comparison. + + ANNUL is nonzero if we should generate an annulling branch. */ + +const char * +output_v9branch (rtx op, rtx dest, int reg, int label, int reversed, + int annul, rtx insn) +{ + static char string[64]; + enum rtx_code code = GET_CODE (op); + enum machine_mode mode = GET_MODE (XEXP (op, 0)); + rtx note; + int far; + char *p; + + /* branch on register are limited to +-128KB. If it is too far away, + change + + brnz,pt %g1, .LC30 + + to + + brz,pn %g1, .+12 + nop + ba,pt %xcc, .LC30 + + and + + brgez,a,pn %o1, .LC29 + + to + + brlz,pt %o1, .+16 + nop + ba,pt %xcc, .LC29 */ + + far = get_attr_length (insn) >= 3; + + /* If not floating-point or if EQ or NE, we can just reverse the code. */ + if (reversed ^ far) + code = reverse_condition (code); + + /* Only 64 bit versions of these instructions exist. */ + gcc_assert (mode == DImode); + + /* Start by writing the branch condition. */ + + switch (code) + { + case NE: + strcpy (string, "brnz"); + break; + + case EQ: + strcpy (string, "brz"); + break; + + case GE: + strcpy (string, "brgez"); + break; + + case LT: + strcpy (string, "brlz"); + break; + + case LE: + strcpy (string, "brlez"); + break; + + case GT: + strcpy (string, "brgz"); + break; + + default: + gcc_unreachable (); + } + + p = strchr (string, '\0'); + + /* Now add the annulling, reg, label, and nop. */ + if (annul && ! far) + { + strcpy (p, ",a"); + p += 2; + } + + if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) + { + strcpy (p, + ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far) + ? ",pt" : ",pn"); + p += 3; + } + + *p = p < string + 8 ? '\t' : ' '; + p++; + *p++ = '%'; + *p++ = '0' + reg; + *p++ = ','; + *p++ = ' '; + if (far) + { + int veryfar = 1, delta; + + if (INSN_ADDRESSES_SET_P ()) + { + delta = (INSN_ADDRESSES (INSN_UID (dest)) + - INSN_ADDRESSES (INSN_UID (insn))); + /* Leave some instructions for "slop". */ + if (delta >= -260000 && delta < 260000) + veryfar = 0; + } + + strcpy (p, ".+12\n\t nop\n\t"); + /* Skip the next insn if requested or + if we know that it will be a nop. */ + if (annul || ! final_sequence) + p[3] = '6'; + p += 12; + if (veryfar) + { + strcpy (p, "b\t"); + p += 2; + } + else + { + strcpy (p, "ba,pt\t%%xcc, "); + p += 13; + } + } + *p++ = '%'; + *p++ = 'l'; + *p++ = '0' + label; + *p++ = '%'; + *p++ = '#'; + *p = '\0'; + + return string; +} + +/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7]. + Such instructions cannot be used in the delay slot of return insn on v9. + If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts. + */ + +static int +epilogue_renumber (register rtx *where, int test) +{ + register const char *fmt; + register int i; + register enum rtx_code code; + + if (*where == 0) + return 0; + + code = GET_CODE (*where); + + switch (code) + { + case REG: + if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */ + return 1; + if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32) + *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where))); + case SCRATCH: + case CC0: + case PC: + case CONST_INT: + case CONST_DOUBLE: + return 0; + + /* Do not replace the frame pointer with the stack pointer because + it can cause the delayed instruction to load below the stack. + This occurs when instructions like: + + (set (reg/i:SI 24 %i0) + (mem/f:SI (plus:SI (reg/f:SI 30 %fp) + (const_int -20 [0xffffffec])) 0)) + + are in the return delayed slot. */ + case PLUS: + if (GET_CODE (XEXP (*where, 0)) == REG + && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM + && (GET_CODE (XEXP (*where, 1)) != CONST_INT + || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS)) + return 1; + break; + + case MEM: + if (SPARC_STACK_BIAS + && GET_CODE (XEXP (*where, 0)) == REG + && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM) + return 1; + break; + + default: + break; + } + + fmt = GET_RTX_FORMAT (code); + + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + for (j = XVECLEN (*where, i) - 1; j >= 0; j--) + if (epilogue_renumber (&(XVECEXP (*where, i, j)), test)) + return 1; + } + else if (fmt[i] == 'e' + && epilogue_renumber (&(XEXP (*where, i)), test)) + return 1; + } + return 0; +} + +/* Leaf functions and non-leaf functions have different needs. */ + +static const int +reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER; + +static const int +reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER; + +static const int *const reg_alloc_orders[] = { + reg_leaf_alloc_order, + reg_nonleaf_alloc_order}; + +void +order_regs_for_local_alloc (void) +{ + static int last_order_nonleaf = 1; + + if (df_regs_ever_live_p (15) != last_order_nonleaf) + { + last_order_nonleaf = !last_order_nonleaf; + memcpy ((char *) reg_alloc_order, + (const char *) reg_alloc_orders[last_order_nonleaf], + FIRST_PSEUDO_REGISTER * sizeof (int)); + } +} + +/* Return 1 if REG and MEM are legitimate enough to allow the various + mem<-->reg splits to be run. */ + +int +sparc_splitdi_legitimate (rtx reg, rtx mem) +{ + /* Punt if we are here by mistake. */ + gcc_assert (reload_completed); + + /* We must have an offsettable memory reference. */ + if (! offsettable_memref_p (mem)) + return 0; + + /* If we have legitimate args for ldd/std, we do not want + the split to happen. */ + if ((REGNO (reg) % 2) == 0 + && mem_min_alignment (mem, 8)) + return 0; + + /* Success. */ + return 1; +} + +/* Return 1 if x and y are some kind of REG and they refer to + different hard registers. This test is guaranteed to be + run after reload. */ + +int +sparc_absnegfloat_split_legitimate (rtx x, rtx y) +{ + if (GET_CODE (x) != REG) + return 0; + if (GET_CODE (y) != REG) + return 0; + if (REGNO (x) == REGNO (y)) + return 0; + return 1; +} + +/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1. + This makes them candidates for using ldd and std insns. + + Note reg1 and reg2 *must* be hard registers. */ + +int +registers_ok_for_ldd_peep (rtx reg1, rtx reg2) +{ + /* We might have been passed a SUBREG. */ + if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG) + return 0; + + if (REGNO (reg1) % 2 != 0) + return 0; + + /* Integer ldd is deprecated in SPARC V9 */ + if (TARGET_V9 && REGNO (reg1) < 32) + return 0; + + return (REGNO (reg1) == REGNO (reg2) - 1); +} + +/* Return 1 if the addresses in mem1 and mem2 are suitable for use in + an ldd or std insn. + + This can only happen when addr1 and addr2, the addresses in mem1 + and mem2, are consecutive memory locations (addr1 + 4 == addr2). + addr1 must also be aligned on a 64-bit boundary. + + Also iff dependent_reg_rtx is not null it should not be used to + compute the address for mem1, i.e. we cannot optimize a sequence + like: + ld [%o0], %o0 + ld [%o0 + 4], %o1 + to + ldd [%o0], %o0 + nor: + ld [%g3 + 4], %g3 + ld [%g3], %g2 + to + ldd [%g3], %g2 + + But, note that the transformation from: + ld [%g2 + 4], %g3 + ld [%g2], %g2 + to + ldd [%g2], %g2 + is perfectly fine. Thus, the peephole2 patterns always pass us + the destination register of the first load, never the second one. + + For stores we don't have a similar problem, so dependent_reg_rtx is + NULL_RTX. */ + +int +mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx) +{ + rtx addr1, addr2; + unsigned int reg1; + HOST_WIDE_INT offset1; + + /* The mems cannot be volatile. */ + if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) + return 0; + + /* MEM1 should be aligned on a 64-bit boundary. */ + if (MEM_ALIGN (mem1) < 64) + return 0; + + addr1 = XEXP (mem1, 0); + addr2 = XEXP (mem2, 0); + + /* Extract a register number and offset (if used) from the first addr. */ + if (GET_CODE (addr1) == PLUS) + { + /* If not a REG, return zero. */ + if (GET_CODE (XEXP (addr1, 0)) != REG) + return 0; + else + { + reg1 = REGNO (XEXP (addr1, 0)); + /* The offset must be constant! */ + if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) + return 0; + offset1 = INTVAL (XEXP (addr1, 1)); + } + } + else if (GET_CODE (addr1) != REG) + return 0; + else + { + reg1 = REGNO (addr1); + /* This was a simple (mem (reg)) expression. Offset is 0. */ + offset1 = 0; + } + + /* Make sure the second address is a (mem (plus (reg) (const_int). */ + if (GET_CODE (addr2) != PLUS) + return 0; + + if (GET_CODE (XEXP (addr2, 0)) != REG + || GET_CODE (XEXP (addr2, 1)) != CONST_INT) + return 0; + + if (reg1 != REGNO (XEXP (addr2, 0))) + return 0; + + if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx)) + return 0; + + /* The first offset must be evenly divisible by 8 to ensure the + address is 64 bit aligned. */ + if (offset1 % 8 != 0) + return 0; + + /* The offset for the second addr must be 4 more than the first addr. */ + if (INTVAL (XEXP (addr2, 1)) != offset1 + 4) + return 0; + + /* All the tests passed. addr1 and addr2 are valid for ldd and std + instructions. */ + return 1; +} + +/* Return 1 if reg is a pseudo, or is the first register in + a hard register pair. This makes it suitable for use in + ldd and std insns. */ + +int +register_ok_for_ldd (rtx reg) +{ + /* We might have been passed a SUBREG. */ + if (!REG_P (reg)) + return 0; + + if (REGNO (reg) < FIRST_PSEUDO_REGISTER) + return (REGNO (reg) % 2 == 0); + + return 1; +} + +/* Return 1 if OP is a memory whose address is known to be + aligned to 8-byte boundary, or a pseudo during reload. + This makes it suitable for use in ldd and std insns. */ + +int +memory_ok_for_ldd (rtx op) +{ + if (MEM_P (op)) + { + /* In 64-bit mode, we assume that the address is word-aligned. */ + if (TARGET_ARCH32 && !mem_min_alignment (op, 8)) + return 0; + + if ((reload_in_progress || reload_completed) + && !strict_memory_address_p (Pmode, XEXP (op, 0))) + return 0; + } + else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER) + { + if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0)) + return 0; + } + else + return 0; + + return 1; +} + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +void +print_operand (FILE *file, rtx x, int code) +{ + switch (code) + { + case '#': + /* Output an insn in a delay slot. */ + if (final_sequence) + sparc_indent_opcode = 1; + else + fputs ("\n\t nop", file); + return; + case '*': + /* Output an annul flag if there's nothing for the delay slot and we + are optimizing. This is always used with '(' below. + Sun OS 4.1.1 dbx can't handle an annulled unconditional branch; + this is a dbx bug. So, we only do this when optimizing. + On UltraSPARC, a branch in a delay slot causes a pipeline flush. + Always emit a nop in case the next instruction is a branch. */ + if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9)) + fputs (",a", file); + return; + case '(': + /* Output a 'nop' if there's nothing for the delay slot and we are + not optimizing. This is always used with '*' above. */ + if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9)) + fputs ("\n\t nop", file); + else if (final_sequence) + sparc_indent_opcode = 1; + return; + case ')': + /* Output the right displacement from the saved PC on function return. + The caller may have placed an "unimp" insn immediately after the call + so we have to account for it. This insn is used in the 32-bit ABI + when calling a function that returns a non zero-sized structure. The + 64-bit ABI doesn't have it. Be careful to have this test be the same + as that for the call. The exception is when sparc_std_struct_return + is enabled, the psABI is followed exactly and the adjustment is made + by the code in sparc_struct_value_rtx. The call emitted is the same + when sparc_std_struct_return is enabled. */ + if (!TARGET_ARCH64 + && cfun->returns_struct + && !sparc_std_struct_return + && DECL_SIZE (DECL_RESULT (current_function_decl)) + && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) + == INTEGER_CST + && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))) + fputs ("12", file); + else + fputc ('8', file); + return; + case '_': + /* Output the Embedded Medium/Anywhere code model base register. */ + fputs (EMBMEDANY_BASE_REG, file); + return; + case '&': + /* Print some local dynamic TLS name. */ + assemble_name (file, get_some_local_dynamic_name ()); + return; + + case 'Y': + /* Adjust the operand to take into account a RESTORE operation. */ + if (GET_CODE (x) == CONST_INT) + break; + else if (GET_CODE (x) != REG) + output_operand_lossage ("invalid %%Y operand"); + else if (REGNO (x) < 8) + fputs (reg_names[REGNO (x)], file); + else if (REGNO (x) >= 24 && REGNO (x) < 32) + fputs (reg_names[REGNO (x)-16], file); + else + output_operand_lossage ("invalid %%Y operand"); + return; + case 'L': + /* Print out the low order register name of a register pair. */ + if (WORDS_BIG_ENDIAN) + fputs (reg_names[REGNO (x)+1], file); + else + fputs (reg_names[REGNO (x)], file); + return; + case 'H': + /* Print out the high order register name of a register pair. */ + if (WORDS_BIG_ENDIAN) + fputs (reg_names[REGNO (x)], file); + else + fputs (reg_names[REGNO (x)+1], file); + return; + case 'R': + /* Print out the second register name of a register pair or quad. + I.e., R (%o0) => %o1. */ + fputs (reg_names[REGNO (x)+1], file); + return; + case 'S': + /* Print out the third register name of a register quad. + I.e., S (%o0) => %o2. */ + fputs (reg_names[REGNO (x)+2], file); + return; + case 'T': + /* Print out the fourth register name of a register quad. + I.e., T (%o0) => %o3. */ + fputs (reg_names[REGNO (x)+3], file); + return; + case 'x': + /* Print a condition code register. */ + if (REGNO (x) == SPARC_ICC_REG) + { + /* We don't handle CC[X]_NOOVmode because they're not supposed + to occur here. */ + if (GET_MODE (x) == CCmode) + fputs ("%icc", file); + else if (GET_MODE (x) == CCXmode) + fputs ("%xcc", file); + else + gcc_unreachable (); + } + else + /* %fccN register */ + fputs (reg_names[REGNO (x)], file); + return; + case 'm': + /* Print the operand's address only. */ + output_address (XEXP (x, 0)); + return; + case 'r': + /* In this case we need a register. Use %g0 if the + operand is const0_rtx. */ + if (x == const0_rtx + || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x)))) + { + fputs ("%g0", file); + return; + } + else + break; + + case 'A': + switch (GET_CODE (x)) + { + case IOR: fputs ("or", file); break; + case AND: fputs ("and", file); break; + case XOR: fputs ("xor", file); break; + default: output_operand_lossage ("invalid %%A operand"); + } + return; + + case 'B': + switch (GET_CODE (x)) + { + case IOR: fputs ("orn", file); break; + case AND: fputs ("andn", file); break; + case XOR: fputs ("xnor", file); break; + default: output_operand_lossage ("invalid %%B operand"); + } + return; + + /* These are used by the conditional move instructions. */ + case 'c' : + case 'C': + { + enum rtx_code rc = GET_CODE (x); + + if (code == 'c') + { + enum machine_mode mode = GET_MODE (XEXP (x, 0)); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (GET_CODE (x)); + else + rc = reverse_condition (GET_CODE (x)); + } + switch (rc) + { + case NE: fputs ("ne", file); break; + case EQ: fputs ("e", file); break; + case GE: fputs ("ge", file); break; + case GT: fputs ("g", file); break; + case LE: fputs ("le", file); break; + case LT: fputs ("l", file); break; + case GEU: fputs ("geu", file); break; + case GTU: fputs ("gu", file); break; + case LEU: fputs ("leu", file); break; + case LTU: fputs ("lu", file); break; + case LTGT: fputs ("lg", file); break; + case UNORDERED: fputs ("u", file); break; + case ORDERED: fputs ("o", file); break; + case UNLT: fputs ("ul", file); break; + case UNLE: fputs ("ule", file); break; + case UNGT: fputs ("ug", file); break; + case UNGE: fputs ("uge", file); break; + case UNEQ: fputs ("ue", file); break; + default: output_operand_lossage (code == 'c' + ? "invalid %%c operand" + : "invalid %%C operand"); + } + return; + } + + /* These are used by the movr instruction pattern. */ + case 'd': + case 'D': + { + enum rtx_code rc = (code == 'd' + ? reverse_condition (GET_CODE (x)) + : GET_CODE (x)); + switch (rc) + { + case NE: fputs ("ne", file); break; + case EQ: fputs ("e", file); break; + case GE: fputs ("gez", file); break; + case LT: fputs ("lz", file); break; + case LE: fputs ("lez", file); break; + case GT: fputs ("gz", file); break; + default: output_operand_lossage (code == 'd' + ? "invalid %%d operand" + : "invalid %%D operand"); + } + return; + } + + case 'b': + { + /* Print a sign-extended character. */ + int i = trunc_int_for_mode (INTVAL (x), QImode); + fprintf (file, "%d", i); + return; + } + + case 'f': + /* Operand must be a MEM; write its address. */ + if (GET_CODE (x) != MEM) + output_operand_lossage ("invalid %%f operand"); + output_address (XEXP (x, 0)); + return; + + case 's': + { + /* Print a sign-extended 32-bit value. */ + HOST_WIDE_INT i; + if (GET_CODE(x) == CONST_INT) + i = INTVAL (x); + else if (GET_CODE(x) == CONST_DOUBLE) + i = CONST_DOUBLE_LOW (x); + else + { + output_operand_lossage ("invalid %%s operand"); + return; + } + i = trunc_int_for_mode (i, SImode); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, i); + return; + } + + case 0: + /* Do nothing special. */ + break; + + default: + /* Undocumented flag. */ + output_operand_lossage ("invalid operand output code"); + } + + if (GET_CODE (x) == REG) + fputs (reg_names[REGNO (x)], file); + else if (GET_CODE (x) == MEM) + { + fputc ('[', file); + /* Poor Sun assembler doesn't understand absolute addressing. */ + if (CONSTANT_P (XEXP (x, 0))) + fputs ("%g0+", file); + output_address (XEXP (x, 0)); + fputc (']', file); + } + else if (GET_CODE (x) == HIGH) + { + fputs ("%hi(", file); + output_addr_const (file, XEXP (x, 0)); + fputc (')', file); + } + else if (GET_CODE (x) == LO_SUM) + { + print_operand (file, XEXP (x, 0), 0); + if (TARGET_CM_MEDMID) + fputs ("+%l44(", file); + else + fputs ("+%lo(", file); + output_addr_const (file, XEXP (x, 1)); + fputc (')', file); + } + else if (GET_CODE (x) == CONST_DOUBLE + && (GET_MODE (x) == VOIDmode + || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)) + { + if (CONST_DOUBLE_HIGH (x) == 0) + fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x)); + else if (CONST_DOUBLE_HIGH (x) == -1 + && CONST_DOUBLE_LOW (x) < 0) + fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x)); + else + output_operand_lossage ("long long constant not a valid immediate operand"); + } + else if (GET_CODE (x) == CONST_DOUBLE) + output_operand_lossage ("floating point constant not a valid immediate operand"); + else { output_addr_const (file, x); } +} + +/* Target hook for assembling integer objects. The sparc version has + special handling for aligned DI-mode objects. */ + +static bool +sparc_assemble_integer (rtx x, unsigned int size, int aligned_p) +{ + /* ??? We only output .xword's for symbols and only then in environments + where the assembler can handle them. */ + if (aligned_p && size == 8 + && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE)) + { + if (TARGET_V9) + { + assemble_integer_with_op ("\t.xword\t", x); + return true; + } + else + { + assemble_aligned_integer (4, const0_rtx); + assemble_aligned_integer (4, x); + return true; + } + } + return default_assemble_integer (x, size, aligned_p); +} + +/* Return the value of a code used in the .proc pseudo-op that says + what kind of result this function returns. For non-C types, we pick + the closest C type. */ + +#ifndef SHORT_TYPE_SIZE +#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2) +#endif + +#ifndef INT_TYPE_SIZE +#define INT_TYPE_SIZE BITS_PER_WORD +#endif + +#ifndef LONG_TYPE_SIZE +#define LONG_TYPE_SIZE BITS_PER_WORD +#endif + +#ifndef LONG_LONG_TYPE_SIZE +#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2) +#endif + +#ifndef FLOAT_TYPE_SIZE +#define FLOAT_TYPE_SIZE BITS_PER_WORD +#endif + +#ifndef DOUBLE_TYPE_SIZE +#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) +#endif + +#ifndef LONG_DOUBLE_TYPE_SIZE +#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) +#endif + +unsigned long +sparc_type_code (register tree type) +{ + register unsigned long qualifiers = 0; + register unsigned shift; + + /* Only the first 30 bits of the qualifier are valid. We must refrain from + setting more, since some assemblers will give an error for this. Also, + we must be careful to avoid shifts of 32 bits or more to avoid getting + unpredictable results. */ + + for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type)) + { + switch (TREE_CODE (type)) + { + case ERROR_MARK: + return qualifiers; + + case ARRAY_TYPE: + qualifiers |= (3 << shift); + break; + + case FUNCTION_TYPE: + case METHOD_TYPE: + qualifiers |= (2 << shift); + break; + + case POINTER_TYPE: + case REFERENCE_TYPE: + case OFFSET_TYPE: + qualifiers |= (1 << shift); + break; + + case RECORD_TYPE: + return (qualifiers | 8); + + case UNION_TYPE: + case QUAL_UNION_TYPE: + return (qualifiers | 9); + + case ENUMERAL_TYPE: + return (qualifiers | 10); + + case VOID_TYPE: + return (qualifiers | 16); + + case INTEGER_TYPE: + /* If this is a range type, consider it to be the underlying + type. */ + if (TREE_TYPE (type) != 0) + break; + + /* Carefully distinguish all the standard types of C, + without messing up if the language is not C. We do this by + testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to + look at both the names and the above fields, but that's redundant. + Any type whose size is between two C types will be considered + to be the wider of the two types. Also, we do not have a + special code to use for "long long", so anything wider than + long is treated the same. Note that we can't distinguish + between "int" and "long" in this code if they are the same + size, but that's fine, since neither can the assembler. */ + + if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE) + return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2)); + + else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE) + return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3)); + + else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE) + return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4)); + + else + return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5)); + + case REAL_TYPE: + /* If this is a range type, consider it to be the underlying + type. */ + if (TREE_TYPE (type) != 0) + break; + + /* Carefully distinguish all the standard types of C, + without messing up if the language is not C. */ + + if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE) + return (qualifiers | 6); + + else + return (qualifiers | 7); + + case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */ + /* ??? We need to distinguish between double and float complex types, + but I don't know how yet because I can't reach this code from + existing front-ends. */ + return (qualifiers | 7); /* Who knows? */ + + case VECTOR_TYPE: + case BOOLEAN_TYPE: /* Boolean truth value type. */ + case LANG_TYPE: + case NULLPTR_TYPE: + return qualifiers; + + default: + gcc_unreachable (); /* Not a type! */ + } + } + + return qualifiers; +} + +/* Nested function support. */ + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. + + This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi + (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes + (to store insns). This is a bit excessive. Perhaps a different + mechanism would be better here. + + Emit enough FLUSH insns to synchronize the data and instruction caches. */ + +static void +sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) +{ + /* SPARC 32-bit trampoline: + + sethi %hi(fn), %g1 + sethi %hi(static), %g2 + jmp %g1+%lo(fn) + or %g2, %lo(static), %g2 + + SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii + JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii + */ + + emit_move_insn + (adjust_address (m_tramp, SImode, 0), + expand_binop (SImode, ior_optab, + expand_shift (RSHIFT_EXPR, SImode, fnaddr, + size_int (10), 0, 1), + GEN_INT (trunc_int_for_mode (0x03000000, SImode)), + NULL_RTX, 1, OPTAB_DIRECT)); + + emit_move_insn + (adjust_address (m_tramp, SImode, 4), + expand_binop (SImode, ior_optab, + expand_shift (RSHIFT_EXPR, SImode, cxt, + size_int (10), 0, 1), + GEN_INT (trunc_int_for_mode (0x05000000, SImode)), + NULL_RTX, 1, OPTAB_DIRECT)); + + emit_move_insn + (adjust_address (m_tramp, SImode, 8), + expand_binop (SImode, ior_optab, + expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX), + GEN_INT (trunc_int_for_mode (0x81c06000, SImode)), + NULL_RTX, 1, OPTAB_DIRECT)); + + emit_move_insn + (adjust_address (m_tramp, SImode, 12), + expand_binop (SImode, ior_optab, + expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX), + GEN_INT (trunc_int_for_mode (0x8410a000, SImode)), + NULL_RTX, 1, OPTAB_DIRECT)); + + /* On UltraSPARC a flush flushes an entire cache line. The trampoline is + aligned on a 16 byte boundary so one flush clears it all. */ + emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0)))); + if (sparc_cpu != PROCESSOR_ULTRASPARC + && sparc_cpu != PROCESSOR_ULTRASPARC3 + && sparc_cpu != PROCESSOR_NIAGARA + && sparc_cpu != PROCESSOR_NIAGARA2) + emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8)))); + + /* Call __enable_execute_stack after writing onto the stack to make sure + the stack address is accessible. */ +#ifdef ENABLE_EXECUTE_STACK + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), + LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); +#endif + +} + +/* The 64-bit version is simpler because it makes more sense to load the + values as "immediate" data out of the trampoline. It's also easier since + we can read the PC without clobbering a register. */ + +static void +sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) +{ + /* SPARC 64-bit trampoline: + + rd %pc, %g1 + ldx [%g1+24], %g5 + jmp %g5 + ldx [%g1+16], %g5 + +16 bytes data + */ + + emit_move_insn (adjust_address (m_tramp, SImode, 0), + GEN_INT (trunc_int_for_mode (0x83414000, SImode))); + emit_move_insn (adjust_address (m_tramp, SImode, 4), + GEN_INT (trunc_int_for_mode (0xca586018, SImode))); + emit_move_insn (adjust_address (m_tramp, SImode, 8), + GEN_INT (trunc_int_for_mode (0x81c14000, SImode))); + emit_move_insn (adjust_address (m_tramp, SImode, 12), + GEN_INT (trunc_int_for_mode (0xca586010, SImode))); + emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt); + emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr); + emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0)))); + + if (sparc_cpu != PROCESSOR_ULTRASPARC + && sparc_cpu != PROCESSOR_ULTRASPARC3 + && sparc_cpu != PROCESSOR_NIAGARA + && sparc_cpu != PROCESSOR_NIAGARA2) + emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8)))); + + /* Call __enable_execute_stack after writing onto the stack to make sure + the stack address is accessible. */ +#ifdef ENABLE_EXECUTE_STACK + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), + LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); +#endif +} + +/* Worker for TARGET_TRAMPOLINE_INIT. */ + +static void +sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) +{ + rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0)); + cxt = force_reg (Pmode, cxt); + if (TARGET_ARCH64) + sparc64_initialize_trampoline (m_tramp, fnaddr, cxt); + else + sparc32_initialize_trampoline (m_tramp, fnaddr, cxt); +} + +/* Adjust the cost of a scheduling dependency. Return the new cost of + a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ + +static int +supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + enum attr_type insn_type; + + if (! recog_memoized (insn)) + return 0; + + insn_type = get_attr_type (insn); + + if (REG_NOTE_KIND (link) == 0) + { + /* Data dependency; DEP_INSN writes a register that INSN reads some + cycles later. */ + + /* if a load, then the dependence must be on the memory address; + add an extra "cycle". Note that the cost could be two cycles + if the reg was written late in an instruction group; we ca not tell + here. */ + if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD) + return cost + 3; + + /* Get the delay only if the address of the store is the dependence. */ + if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE) + { + rtx pat = PATTERN(insn); + rtx dep_pat = PATTERN (dep_insn); + + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + return cost; /* This should not happen! */ + + /* The dependency between the two instructions was on the data that + is being stored. Assume that this implies that the address of the + store is not dependent. */ + if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) + return cost; + + return cost + 3; /* An approximation. */ + } + + /* A shift instruction cannot receive its data from an instruction + in the same cycle; add a one cycle penalty. */ + if (insn_type == TYPE_SHIFT) + return cost + 3; /* Split before cascade into shift. */ + } + else + { + /* Anti- or output- dependency; DEP_INSN reads/writes a register that + INSN writes some cycles later. */ + + /* These are only significant for the fpu unit; writing a fp reg before + the fpu has finished with it stalls the processor. */ + + /* Reusing an integer register causes no problems. */ + if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) + return 0; + } + + return cost; +} + +static int +hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + enum attr_type insn_type, dep_type; + rtx pat = PATTERN(insn); + rtx dep_pat = PATTERN (dep_insn); + + if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) + return cost; + + insn_type = get_attr_type (insn); + dep_type = get_attr_type (dep_insn); + + switch (REG_NOTE_KIND (link)) + { + case 0: + /* Data dependency; DEP_INSN writes a register that INSN reads some + cycles later. */ + + switch (insn_type) + { + case TYPE_STORE: + case TYPE_FPSTORE: + /* Get the delay iff the address of the store is the dependence. */ + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + return cost; + + if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) + return cost; + return cost + 3; + + case TYPE_LOAD: + case TYPE_SLOAD: + case TYPE_FPLOAD: + /* If a load, then the dependence must be on the memory address. If + the addresses aren't equal, then it might be a false dependency */ + if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE) + { + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET + || GET_CODE (SET_DEST (dep_pat)) != MEM + || GET_CODE (SET_SRC (pat)) != MEM + || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0), + XEXP (SET_SRC (pat), 0))) + return cost + 2; + + return cost + 8; + } + break; + + case TYPE_BRANCH: + /* Compare to branch latency is 0. There is no benefit from + separating compare and branch. */ + if (dep_type == TYPE_COMPARE) + return 0; + /* Floating point compare to branch latency is less than + compare to conditional move. */ + if (dep_type == TYPE_FPCMP) + return cost - 1; + break; + default: + break; + } + break; + + case REG_DEP_ANTI: + /* Anti-dependencies only penalize the fpu unit. */ + if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) + return 0; + break; + + default: + break; + } + + return cost; +} + +static int +sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost) +{ + switch (sparc_cpu) + { + case PROCESSOR_SUPERSPARC: + cost = supersparc_adjust_cost (insn, link, dep, cost); + break; + case PROCESSOR_HYPERSPARC: + case PROCESSOR_SPARCLITE86X: + cost = hypersparc_adjust_cost (insn, link, dep, cost); + break; + default: + break; + } + return cost; +} + +static void +sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) +{} + +static int +sparc_use_sched_lookahead (void) +{ + if (sparc_cpu == PROCESSOR_NIAGARA + || sparc_cpu == PROCESSOR_NIAGARA2) + return 0; + if (sparc_cpu == PROCESSOR_ULTRASPARC + || sparc_cpu == PROCESSOR_ULTRASPARC3) + return 4; + if ((1 << sparc_cpu) & + ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) | + (1 << PROCESSOR_SPARCLITE86X))) + return 3; + return 0; +} + +static int +sparc_issue_rate (void) +{ + switch (sparc_cpu) + { + case PROCESSOR_NIAGARA: + case PROCESSOR_NIAGARA2: + default: + return 1; + case PROCESSOR_V9: + /* Assume V9 processors are capable of at least dual-issue. */ + return 2; + case PROCESSOR_SUPERSPARC: + return 3; + case PROCESSOR_HYPERSPARC: + case PROCESSOR_SPARCLITE86X: + return 2; + case PROCESSOR_ULTRASPARC: + case PROCESSOR_ULTRASPARC3: + return 4; + } +} + +static int +set_extends (rtx insn) +{ + register rtx pat = PATTERN (insn); + + switch (GET_CODE (SET_SRC (pat))) + { + /* Load and some shift instructions zero extend. */ + case MEM: + case ZERO_EXTEND: + /* sethi clears the high bits */ + case HIGH: + /* LO_SUM is used with sethi. sethi cleared the high + bits and the values used with lo_sum are positive */ + case LO_SUM: + /* Store flag stores 0 or 1 */ + case LT: case LTU: + case GT: case GTU: + case LE: case LEU: + case GE: case GEU: + case EQ: + case NE: + return 1; + case AND: + { + rtx op0 = XEXP (SET_SRC (pat), 0); + rtx op1 = XEXP (SET_SRC (pat), 1); + if (GET_CODE (op1) == CONST_INT) + return INTVAL (op1) >= 0; + if (GET_CODE (op0) != REG) + return 0; + if (sparc_check_64 (op0, insn) == 1) + return 1; + return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); + } + case IOR: + case XOR: + { + rtx op0 = XEXP (SET_SRC (pat), 0); + rtx op1 = XEXP (SET_SRC (pat), 1); + if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0) + return 0; + if (GET_CODE (op1) == CONST_INT) + return INTVAL (op1) >= 0; + return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); + } + case LSHIFTRT: + return GET_MODE (SET_SRC (pat)) == SImode; + /* Positive integers leave the high bits zero. */ + case CONST_DOUBLE: + return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000); + case CONST_INT: + return ! (INTVAL (SET_SRC (pat)) & 0x80000000); + case ASHIFTRT: + case SIGN_EXTEND: + return - (GET_MODE (SET_SRC (pat)) == SImode); + case REG: + return sparc_check_64 (SET_SRC (pat), insn); + default: + return 0; + } +} + +/* We _ought_ to have only one kind per function, but... */ +static GTY(()) rtx sparc_addr_diff_list; +static GTY(()) rtx sparc_addr_list; + +void +sparc_defer_case_vector (rtx lab, rtx vec, int diff) +{ + vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec); + if (diff) + sparc_addr_diff_list + = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list); + else + sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list); +} + +static void +sparc_output_addr_vec (rtx vec) +{ + rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); + int idx, vlen = XVECLEN (body, 0); + +#ifdef ASM_OUTPUT_ADDR_VEC_START + ASM_OUTPUT_ADDR_VEC_START (asm_out_file); +#endif + +#ifdef ASM_OUTPUT_CASE_LABEL + ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), + NEXT_INSN (lab)); +#else + (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); +#endif + + for (idx = 0; idx < vlen; idx++) + { + ASM_OUTPUT_ADDR_VEC_ELT + (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); + } + +#ifdef ASM_OUTPUT_ADDR_VEC_END + ASM_OUTPUT_ADDR_VEC_END (asm_out_file); +#endif +} + +static void +sparc_output_addr_diff_vec (rtx vec) +{ + rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); + rtx base = XEXP (XEXP (body, 0), 0); + int idx, vlen = XVECLEN (body, 1); + +#ifdef ASM_OUTPUT_ADDR_VEC_START + ASM_OUTPUT_ADDR_VEC_START (asm_out_file); +#endif + +#ifdef ASM_OUTPUT_CASE_LABEL + ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), + NEXT_INSN (lab)); +#else + (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); +#endif + + for (idx = 0; idx < vlen; idx++) + { + ASM_OUTPUT_ADDR_DIFF_ELT + (asm_out_file, + body, + CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)), + CODE_LABEL_NUMBER (base)); + } + +#ifdef ASM_OUTPUT_ADDR_VEC_END + ASM_OUTPUT_ADDR_VEC_END (asm_out_file); +#endif +} + +static void +sparc_output_deferred_case_vectors (void) +{ + rtx t; + int align; + + if (sparc_addr_list == NULL_RTX + && sparc_addr_diff_list == NULL_RTX) + return; + + /* Align to cache line in the function's code section. */ + switch_to_section (current_function_section ()); + + align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); + if (align > 0) + ASM_OUTPUT_ALIGN (asm_out_file, align); + + for (t = sparc_addr_list; t ; t = XEXP (t, 1)) + sparc_output_addr_vec (XEXP (t, 0)); + for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1)) + sparc_output_addr_diff_vec (XEXP (t, 0)); + + sparc_addr_list = sparc_addr_diff_list = NULL_RTX; +} + +/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are + unknown. Return 1 if the high bits are zero, -1 if the register is + sign extended. */ +int +sparc_check_64 (rtx x, rtx insn) +{ + /* If a register is set only once it is safe to ignore insns this + code does not know how to handle. The loop will either recognize + the single set and return the correct value or fail to recognize + it and return 0. */ + int set_once = 0; + rtx y = x; + + gcc_assert (GET_CODE (x) == REG); + + if (GET_MODE (x) == DImode) + y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN); + + if (flag_expensive_optimizations + && df && DF_REG_DEF_COUNT (REGNO (y)) == 1) + set_once = 1; + + if (insn == 0) + { + if (set_once) + insn = get_last_insn_anywhere (); + else + return 0; + } + + while ((insn = PREV_INSN (insn))) + { + switch (GET_CODE (insn)) + { + case JUMP_INSN: + case NOTE: + break; + case CODE_LABEL: + case CALL_INSN: + default: + if (! set_once) + return 0; + break; + case INSN: + { + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != SET) + return 0; + if (rtx_equal_p (x, SET_DEST (pat))) + return set_extends (insn); + if (y && rtx_equal_p (y, SET_DEST (pat))) + return set_extends (insn); + if (reg_overlap_mentioned_p (SET_DEST (pat), y)) + return 0; + } + } + } + return 0; +} + +/* Returns assembly code to perform a DImode shift using + a 64-bit global or out register on SPARC-V8+. */ +const char * +output_v8plus_shift (rtx *operands, rtx insn, const char *opcode) +{ + static char asm_code[60]; + + /* The scratch register is only required when the destination + register is not a 64-bit global or out register. */ + if (which_alternative != 2) + operands[3] = operands[0]; + + /* We can only shift by constants <= 63. */ + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); + + if (GET_CODE (operands[1]) == CONST_INT) + { + output_asm_insn ("mov\t%1, %3", operands); + } + else + { + output_asm_insn ("sllx\t%H1, 32, %3", operands); + if (sparc_check_64 (operands[1], insn) <= 0) + output_asm_insn ("srl\t%L1, 0, %L1", operands); + output_asm_insn ("or\t%L1, %3, %3", operands); + } + + strcpy(asm_code, opcode); + + if (which_alternative != 2) + return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0"); + else + return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"); +} + +/* Output rtl to increment the profiler label LABELNO + for profiling a function entry. */ + +void +sparc_profile_hook (int labelno) +{ + char buf[32]; + rtx lab, fun; + + fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION); + if (NO_PROFILE_COUNTERS) + { + emit_library_call (fun, LCT_NORMAL, VOIDmode, 0); + } + else + { + ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); + lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); + emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode); + } +} + +/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ + +static void +sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags, + tree decl ATTRIBUTE_UNUSED) +{ + fprintf (asm_out_file, "\t.section\t\"%s\"", name); + + if (!(flags & SECTION_DEBUG)) + fputs (",#alloc", asm_out_file); + if (flags & SECTION_WRITE) + fputs (",#write", asm_out_file); + if (flags & SECTION_TLS) + fputs (",#tls", asm_out_file); + if (flags & SECTION_CODE) + fputs (",#execinstr", asm_out_file); + + /* ??? Handle SECTION_BSS. */ + + fputc ('\n', asm_out_file); +} + +/* We do not allow indirect calls to be optimized into sibling calls. + + We cannot use sibling calls when delayed branches are disabled + because they will likely require the call delay slot to be filled. + + Also, on SPARC 32-bit we cannot emit a sibling call when the + current function returns a structure. This is because the "unimp + after call" convention would cause the callee to return to the + wrong place. The generic code already disallows cases where the + function being called returns a structure. + + It may seem strange how this last case could occur. Usually there + is code after the call which jumps to epilogue code which dumps the + return value into the struct return area. That ought to invalidate + the sibling call right? Well, in the C++ case we can end up passing + the pointer to the struct return area to a constructor (which returns + void) and then nothing else happens. Such a sibling call would look + valid without the added check here. + + VxWorks PIC PLT entries require the global pointer to be initialized + on entry. We therefore can't emit sibling calls to them. */ +static bool +sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +{ + return (decl + && flag_delayed_branch + && (TARGET_ARCH64 || ! cfun->returns_struct) + && !(TARGET_VXWORKS_RTP + && flag_pic + && !targetm.binds_local_p (decl))); +} + +/* libfunc renaming. */ + +static void +sparc_init_libfuncs (void) +{ + if (TARGET_ARCH32) + { + /* Use the subroutines that Sun's library provides for integer + multiply and divide. The `*' prevents an underscore from + being prepended by the compiler. .umul is a little faster + than .mul. */ + set_optab_libfunc (smul_optab, SImode, "*.umul"); + set_optab_libfunc (sdiv_optab, SImode, "*.div"); + set_optab_libfunc (udiv_optab, SImode, "*.udiv"); + set_optab_libfunc (smod_optab, SImode, "*.rem"); + set_optab_libfunc (umod_optab, SImode, "*.urem"); + + /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */ + set_optab_libfunc (add_optab, TFmode, "_Q_add"); + set_optab_libfunc (sub_optab, TFmode, "_Q_sub"); + set_optab_libfunc (neg_optab, TFmode, "_Q_neg"); + set_optab_libfunc (smul_optab, TFmode, "_Q_mul"); + set_optab_libfunc (sdiv_optab, TFmode, "_Q_div"); + + /* We can define the TFmode sqrt optab only if TARGET_FPU. This + is because with soft-float, the SFmode and DFmode sqrt + instructions will be absent, and the compiler will notice and + try to use the TFmode sqrt instruction for calls to the + builtin function sqrt, but this fails. */ + if (TARGET_FPU) + set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt"); + + set_optab_libfunc (eq_optab, TFmode, "_Q_feq"); + set_optab_libfunc (ne_optab, TFmode, "_Q_fne"); + set_optab_libfunc (gt_optab, TFmode, "_Q_fgt"); + set_optab_libfunc (ge_optab, TFmode, "_Q_fge"); + set_optab_libfunc (lt_optab, TFmode, "_Q_flt"); + set_optab_libfunc (le_optab, TFmode, "_Q_fle"); + + set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq"); + set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq"); + set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos"); + set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod"); + + set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi"); + set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou"); + set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq"); + set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq"); + + if (DITF_CONVERSION_LIBFUNCS) + { + set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll"); + set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull"); + set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq"); + set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq"); + } + + if (SUN_CONVERSION_LIBFUNCS) + { + set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll"); + set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull"); + set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll"); + set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull"); + } + } + if (TARGET_ARCH64) + { + /* In the SPARC 64bit ABI, SImode multiply and divide functions + do not exist in the library. Make sure the compiler does not + emit calls to them by accident. (It should always use the + hardware instructions.) */ + set_optab_libfunc (smul_optab, SImode, 0); + set_optab_libfunc (sdiv_optab, SImode, 0); + set_optab_libfunc (udiv_optab, SImode, 0); + set_optab_libfunc (smod_optab, SImode, 0); + set_optab_libfunc (umod_optab, SImode, 0); + + if (SUN_INTEGER_MULTIPLY_64) + { + set_optab_libfunc (smul_optab, DImode, "__mul64"); + set_optab_libfunc (sdiv_optab, DImode, "__div64"); + set_optab_libfunc (udiv_optab, DImode, "__udiv64"); + set_optab_libfunc (smod_optab, DImode, "__rem64"); + set_optab_libfunc (umod_optab, DImode, "__urem64"); + } + + if (SUN_CONVERSION_LIBFUNCS) + { + set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol"); + set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul"); + set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol"); + set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul"); + } + } +} + +#define def_builtin(NAME, CODE, TYPE) \ + add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \ + NULL_TREE) + +/* Implement the TARGET_INIT_BUILTINS target hook. + Create builtin functions for special SPARC instructions. */ + +static void +sparc_init_builtins (void) +{ + if (TARGET_VIS) + sparc_vis_init_builtins (); +} + +/* Create builtin functions for VIS 1.0 instructions. */ + +static void +sparc_vis_init_builtins (void) +{ + tree v4qi = build_vector_type (unsigned_intQI_type_node, 4); + tree v8qi = build_vector_type (unsigned_intQI_type_node, 8); + tree v4hi = build_vector_type (intHI_type_node, 4); + tree v2hi = build_vector_type (intHI_type_node, 2); + tree v2si = build_vector_type (intSI_type_node, 2); + + tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0); + tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0); + tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0); + tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0); + tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0); + tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0); + tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0); + tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0); + tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0); + tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0); + tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0); + tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0); + tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node, + v8qi, v8qi, + intDI_type_node, 0); + tree di_ftype_di_di = build_function_type_list (intDI_type_node, + intDI_type_node, + intDI_type_node, 0); + tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node, + ptr_type_node, + intSI_type_node, 0); + tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node, + ptr_type_node, + intDI_type_node, 0); + + /* Packing and expanding vectors. */ + def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi); + def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis, + v8qi_ftype_v2si_v8qi); + def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis, + v2hi_ftype_v2si); + def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi); + def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis, + v8qi_ftype_v4qi_v4qi); + + /* Multiplications. */ + def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis, + v4hi_ftype_v4qi_v4hi); + def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis, + v4hi_ftype_v4qi_v2hi); + def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis, + v4hi_ftype_v4qi_v2hi); + def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis, + v4hi_ftype_v8qi_v4hi); + def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis, + v4hi_ftype_v8qi_v4hi); + def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis, + v2si_ftype_v4qi_v2hi); + def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis, + v2si_ftype_v4qi_v2hi); + + /* Data aligning. */ + def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis, + v4hi_ftype_v4hi_v4hi); + def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis, + v8qi_ftype_v8qi_v8qi); + def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis, + v2si_ftype_v2si_v2si); + def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis, + di_ftype_di_di); + if (TARGET_ARCH64) + def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis, + ptr_ftype_ptr_di); + else + def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis, + ptr_ftype_ptr_si); + + /* Pixel distance. */ + def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis, + di_ftype_v8qi_v8qi_di); +} + +/* Handle TARGET_EXPAND_BUILTIN target hook. + Expand builtin functions for sparc intrinsics. */ + +static rtx +sparc_expand_builtin (tree exp, rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode tmode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + tree arg; + call_expr_arg_iterator iter; + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int icode = DECL_FUNCTION_CODE (fndecl); + rtx pat, op[4]; + enum machine_mode mode[4]; + int arg_count = 0; + + mode[0] = insn_data[icode].operand[0].mode; + if (!target + || GET_MODE (target) != mode[0] + || ! (*insn_data[icode].operand[0].predicate) (target, mode[0])) + op[0] = gen_reg_rtx (mode[0]); + else + op[0] = target; + + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { + arg_count++; + mode[arg_count] = insn_data[icode].operand[arg_count].mode; + op[arg_count] = expand_normal (arg); + + if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count], + mode[arg_count])) + op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]); + } + + switch (arg_count) + { + case 1: + pat = GEN_FCN (icode) (op[0], op[1]); + break; + case 2: + pat = GEN_FCN (icode) (op[0], op[1], op[2]); + break; + case 3: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); + break; + default: + gcc_unreachable (); + } + + if (!pat) + return NULL_RTX; + + emit_insn (pat); + + return op[0]; +} + +static int +sparc_vis_mul8x16 (int e8, int e16) +{ + return (e8 * e16 + 128) / 256; +} + +/* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified + by FNCODE. All of the elements in ELTS0 and ELTS1 lists must be integer + constants. A tree list with the results of the multiplications is returned, + and each element in the list is of INNER_TYPE. */ + +static tree +sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1) +{ + tree n_elts = NULL_TREE; + int scale; + + switch (fncode) + { + case CODE_FOR_fmul8x16_vis: + for (; elts0 && elts1; + elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1)) + { + int val + = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)), + TREE_INT_CST_LOW (TREE_VALUE (elts1))); + n_elts = tree_cons (NULL_TREE, + build_int_cst (inner_type, val), + n_elts); + } + break; + + case CODE_FOR_fmul8x16au_vis: + scale = TREE_INT_CST_LOW (TREE_VALUE (elts1)); + + for (; elts0; elts0 = TREE_CHAIN (elts0)) + { + int val + = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)), + scale); + n_elts = tree_cons (NULL_TREE, + build_int_cst (inner_type, val), + n_elts); + } + break; + + case CODE_FOR_fmul8x16al_vis: + scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1))); + + for (; elts0; elts0 = TREE_CHAIN (elts0)) + { + int val + = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)), + scale); + n_elts = tree_cons (NULL_TREE, + build_int_cst (inner_type, val), + n_elts); + } + break; + + default: + gcc_unreachable (); + } + + return nreverse (n_elts); + +} +/* Handle TARGET_FOLD_BUILTIN target hook. + Fold builtin functions for SPARC intrinsics. If IGNORE is true the + result of the function call is ignored. NULL_TREE is returned if the + function could not be folded. */ + +static tree +sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, + tree *args, bool ignore) +{ + tree arg0, arg1, arg2; + tree rtype = TREE_TYPE (TREE_TYPE (fndecl)); + enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl); + + if (ignore + && icode != CODE_FOR_alignaddrsi_vis + && icode != CODE_FOR_alignaddrdi_vis) + return build_zero_cst (rtype); + + switch (icode) + { + case CODE_FOR_fexpand_vis: + arg0 = args[0]; + STRIP_NOPS (arg0); + + if (TREE_CODE (arg0) == VECTOR_CST) + { + tree inner_type = TREE_TYPE (rtype); + tree elts = TREE_VECTOR_CST_ELTS (arg0); + tree n_elts = NULL_TREE; + + for (; elts; elts = TREE_CHAIN (elts)) + { + unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4; + n_elts = tree_cons (NULL_TREE, + build_int_cst (inner_type, val), + n_elts); + } + return build_vector (rtype, nreverse (n_elts)); + } + break; + + case CODE_FOR_fmul8x16_vis: + case CODE_FOR_fmul8x16au_vis: + case CODE_FOR_fmul8x16al_vis: + arg0 = args[0]; + arg1 = args[1]; + STRIP_NOPS (arg0); + STRIP_NOPS (arg1); + + if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) + { + tree inner_type = TREE_TYPE (rtype); + tree elts0 = TREE_VECTOR_CST_ELTS (arg0); + tree elts1 = TREE_VECTOR_CST_ELTS (arg1); + tree n_elts = sparc_handle_vis_mul8x16 (icode, inner_type, elts0, + elts1); + + return build_vector (rtype, n_elts); + } + break; + + case CODE_FOR_fpmerge_vis: + arg0 = args[0]; + arg1 = args[1]; + STRIP_NOPS (arg0); + STRIP_NOPS (arg1); + + if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) + { + tree elts0 = TREE_VECTOR_CST_ELTS (arg0); + tree elts1 = TREE_VECTOR_CST_ELTS (arg1); + tree n_elts = NULL_TREE; + + for (; elts0 && elts1; + elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1)) + { + n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts); + n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts); + } + + return build_vector (rtype, nreverse (n_elts)); + } + break; + + case CODE_FOR_pdist_vis: + arg0 = args[0]; + arg1 = args[1]; + arg2 = args[2]; + STRIP_NOPS (arg0); + STRIP_NOPS (arg1); + STRIP_NOPS (arg2); + + if (TREE_CODE (arg0) == VECTOR_CST + && TREE_CODE (arg1) == VECTOR_CST + && TREE_CODE (arg2) == INTEGER_CST) + { + int overflow = 0; + unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2); + HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2); + tree elts0 = TREE_VECTOR_CST_ELTS (arg0); + tree elts1 = TREE_VECTOR_CST_ELTS (arg1); + + for (; elts0 && elts1; + elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1)) + { + unsigned HOST_WIDE_INT + low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)), + low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1)); + HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0)); + HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1)); + + unsigned HOST_WIDE_INT l; + HOST_WIDE_INT h; + + overflow |= neg_double (low1, high1, &l, &h); + overflow |= add_double (low0, high0, l, h, &l, &h); + if (h < 0) + overflow |= neg_double (l, h, &l, &h); + + overflow |= add_double (low, high, l, h, &low, &high); + } + + gcc_assert (overflow == 0); + + return build_int_cst_wide (rtype, low, high); + } + + default: + break; + } + + return NULL_TREE; +} + +/* ??? This duplicates information provided to the compiler by the + ??? scheduler description. Some day, teach genautomata to output + ??? the latencies and then CSE will just use that. */ + +static bool +sparc_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) +{ + enum machine_mode mode = GET_MODE (x); + bool float_mode_p = FLOAT_MODE_P (mode); + + switch (code) + { + case CONST_INT: + if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000) + { + *total = 0; + return true; + } + /* FALLTHRU */ + + case HIGH: + *total = 2; + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = 4; + return true; + + case CONST_DOUBLE: + if (GET_MODE (x) == VOIDmode + && ((CONST_DOUBLE_HIGH (x) == 0 + && CONST_DOUBLE_LOW (x) < 0x1000) + || (CONST_DOUBLE_HIGH (x) == -1 + && CONST_DOUBLE_LOW (x) < 0 + && CONST_DOUBLE_LOW (x) >= -0x1000))) + *total = 0; + else + *total = 8; + return true; + + case MEM: + /* If outer-code was a sign or zero extension, a cost + of COSTS_N_INSNS (1) was already added in. This is + why we are subtracting it back out. */ + if (outer_code == ZERO_EXTEND) + { + *total = sparc_costs->int_zload - COSTS_N_INSNS (1); + } + else if (outer_code == SIGN_EXTEND) + { + *total = sparc_costs->int_sload - COSTS_N_INSNS (1); + } + else if (float_mode_p) + { + *total = sparc_costs->float_load; + } + else + { + *total = sparc_costs->int_load; + } + + return true; + + case PLUS: + case MINUS: + if (float_mode_p) + *total = sparc_costs->float_plusminus; + else + *total = COSTS_N_INSNS (1); + return false; + + case MULT: + if (float_mode_p) + *total = sparc_costs->float_mul; + else if (! TARGET_HARD_MUL) + *total = COSTS_N_INSNS (25); + else + { + int bit_cost; + + bit_cost = 0; + if (sparc_costs->int_mul_bit_factor) + { + int nbits; + + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); + for (nbits = 0; value != 0; value &= value - 1) + nbits++; + } + else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE + && GET_MODE (XEXP (x, 1)) == VOIDmode) + { + rtx x1 = XEXP (x, 1); + unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1); + unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1); + + for (nbits = 0; value1 != 0; value1 &= value1 - 1) + nbits++; + for (; value2 != 0; value2 &= value2 - 1) + nbits++; + } + else + nbits = 7; + + if (nbits < 3) + nbits = 3; + bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor; + bit_cost = COSTS_N_INSNS (bit_cost); + } + + if (mode == DImode) + *total = sparc_costs->int_mulX + bit_cost; + else + *total = sparc_costs->int_mul + bit_cost; + } + return false; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty; + return false; + + case DIV: + case UDIV: + case MOD: + case UMOD: + if (float_mode_p) + { + if (mode == DFmode) + *total = sparc_costs->float_div_df; + else + *total = sparc_costs->float_div_sf; + } + else + { + if (mode == DImode) + *total = sparc_costs->int_divX; + else + *total = sparc_costs->int_div; + } + return false; + + case NEG: + if (! float_mode_p) + { + *total = COSTS_N_INSNS (1); + return false; + } + /* FALLTHRU */ + + case ABS: + case FLOAT: + case UNSIGNED_FLOAT: + case FIX: + case UNSIGNED_FIX: + case FLOAT_EXTEND: + case FLOAT_TRUNCATE: + *total = sparc_costs->float_move; + return false; + + case SQRT: + if (mode == DFmode) + *total = sparc_costs->float_sqrt_df; + else + *total = sparc_costs->float_sqrt_sf; + return false; + + case COMPARE: + if (float_mode_p) + *total = sparc_costs->float_cmp; + else + *total = COSTS_N_INSNS (1); + return false; + + case IF_THEN_ELSE: + if (float_mode_p) + *total = sparc_costs->float_cmove; + else + *total = sparc_costs->int_cmove; + return false; + + case IOR: + /* Handle the NAND vector patterns. */ + if (sparc_vector_mode_supported_p (GET_MODE (x)) + && GET_CODE (XEXP (x, 0)) == NOT + && GET_CODE (XEXP (x, 1)) == NOT) + { + *total = COSTS_N_INSNS (1); + return true; + } + else + return false; + + default: + return false; + } +} + +/* Emit the sequence of insns SEQ while preserving the registers REG and REG2. + This is achieved by means of a manual dynamic stack space allocation in + the current frame. We make the assumption that SEQ doesn't contain any + function calls, with the possible exception of calls to the GOT helper. */ + +static void +emit_and_preserve (rtx seq, rtx reg, rtx reg2) +{ + /* We must preserve the lowest 16 words for the register save area. */ + HOST_WIDE_INT offset = 16*UNITS_PER_WORD; + /* We really need only 2 words of fresh stack space. */ + HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD); + + rtx slot + = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx, + SPARC_STACK_BIAS + offset)); + + emit_insn (gen_stack_pointer_dec (GEN_INT (size))); + emit_insn (gen_rtx_SET (VOIDmode, slot, reg)); + if (reg2) + emit_insn (gen_rtx_SET (VOIDmode, + adjust_address (slot, word_mode, UNITS_PER_WORD), + reg2)); + emit_insn (seq); + if (reg2) + emit_insn (gen_rtx_SET (VOIDmode, + reg2, + adjust_address (slot, word_mode, UNITS_PER_WORD))); + emit_insn (gen_rtx_SET (VOIDmode, reg, slot)); + emit_insn (gen_stack_pointer_inc (GEN_INT (size))); +} + +/* Output the assembler code for a thunk function. THUNK_DECL is the + declaration for the thunk function itself, FUNCTION is the decl for + the target function. DELTA is an immediate constant offset to be + added to THIS. If VCALL_OFFSET is nonzero, the word at address + (*THIS + VCALL_OFFSET) should be additionally added to THIS. */ + +static void +sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, + tree function) +{ + rtx this_rtx, insn, funexp; + unsigned int int_arg_first; + + reload_completed = 1; + epilogue_completed = 1; + + emit_note (NOTE_INSN_PROLOGUE_END); + + if (flag_delayed_branch) + { + /* We will emit a regular sibcall below, so we need to instruct + output_sibcall that we are in a leaf function. */ + sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1; + + /* This will cause final.c to invoke leaf_renumber_regs so we + must behave as if we were in a not-yet-leafified function. */ + int_arg_first = SPARC_INCOMING_INT_ARG_FIRST; + } + else + { + /* We will emit the sibcall manually below, so we will need to + manually spill non-leaf registers. */ + sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0; + + /* We really are in a leaf function. */ + int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; + } + + /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function + returns a structure, the structure return pointer is there instead. */ + if (TARGET_ARCH64 + && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) + this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1); + else + this_rtx = gen_rtx_REG (Pmode, int_arg_first); + + /* Add DELTA. When possible use a plain add, otherwise load it into + a register first. */ + if (delta) + { + rtx delta_rtx = GEN_INT (delta); + + if (! SPARC_SIMM13_P (delta)) + { + rtx scratch = gen_rtx_REG (Pmode, 1); + emit_move_insn (scratch, delta_rtx); + delta_rtx = scratch; + } + + /* THIS_RTX += DELTA. */ + emit_insn (gen_add2_insn (this_rtx, delta_rtx)); + } + + /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */ + if (vcall_offset) + { + rtx vcall_offset_rtx = GEN_INT (vcall_offset); + rtx scratch = gen_rtx_REG (Pmode, 1); + + gcc_assert (vcall_offset < 0); + + /* SCRATCH = *THIS_RTX. */ + emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx)); + + /* Prepare for adding VCALL_OFFSET. The difficulty is that we + may not have any available scratch register at this point. */ + if (SPARC_SIMM13_P (vcall_offset)) + ; + /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */ + else if (! fixed_regs[5] + /* The below sequence is made up of at least 2 insns, + while the default method may need only one. */ + && vcall_offset < -8192) + { + rtx scratch2 = gen_rtx_REG (Pmode, 5); + emit_move_insn (scratch2, vcall_offset_rtx); + vcall_offset_rtx = scratch2; + } + else + { + rtx increment = GEN_INT (-4096); + + /* VCALL_OFFSET is a negative number whose typical range can be + estimated as -32768..0 in 32-bit mode. In almost all cases + it is therefore cheaper to emit multiple add insns than + spilling and loading the constant into a register (at least + 6 insns). */ + while (! SPARC_SIMM13_P (vcall_offset)) + { + emit_insn (gen_add2_insn (scratch, increment)); + vcall_offset += 4096; + } + vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */ + } + + /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */ + emit_move_insn (scratch, gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, + scratch, + vcall_offset_rtx))); + + /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */ + emit_insn (gen_add2_insn (this_rtx, scratch)); + } + + /* Generate a tail call to the target function. */ + if (! TREE_USED (function)) + { + assemble_external (function); + TREE_USED (function) = 1; + } + funexp = XEXP (DECL_RTL (function), 0); + + if (flag_delayed_branch) + { + funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); + insn = emit_call_insn (gen_sibcall (funexp)); + SIBLING_CALL_P (insn) = 1; + } + else + { + /* The hoops we have to jump through in order to generate a sibcall + without using delay slots... */ + rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1); + + if (flag_pic) + { + spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */ + start_sequence (); + /* Delay emitting the GOT helper function because it needs to + change the section and we are emitting assembly code. */ + load_got_register (); /* clobbers %o7 */ + scratch = sparc_legitimize_pic_address (funexp, scratch); + seq = get_insns (); + end_sequence (); + emit_and_preserve (seq, spill_reg, pic_offset_table_rtx); + } + else if (TARGET_ARCH32) + { + emit_insn (gen_rtx_SET (VOIDmode, + scratch, + gen_rtx_HIGH (SImode, funexp))); + emit_insn (gen_rtx_SET (VOIDmode, + scratch, + gen_rtx_LO_SUM (SImode, scratch, funexp))); + } + else /* TARGET_ARCH64 */ + { + switch (sparc_cmodel) + { + case CM_MEDLOW: + case CM_MEDMID: + /* The destination can serve as a temporary. */ + sparc_emit_set_symbolic_const64 (scratch, funexp, scratch); + break; + + case CM_MEDANY: + case CM_EMBMEDANY: + /* The destination cannot serve as a temporary. */ + spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */ + start_sequence (); + sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg); + seq = get_insns (); + end_sequence (); + emit_and_preserve (seq, spill_reg, 0); + break; + + default: + gcc_unreachable (); + } + } + + emit_jump_insn (gen_indirect_jump (scratch)); + } + + emit_barrier (); + + /* Run just enough of rest_of_compilation to get the insns emitted. + There's not really enough bulk here to make other passes such as + instruction scheduling worth while. Note that use_thunk calls + assemble_start_function and assemble_end_function. */ + insn = get_insns (); + insn_locators_alloc (); + shorten_branches (insn); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); + + reload_completed = 0; + epilogue_completed = 0; +} + +/* Return true if sparc_output_mi_thunk would be able to output the + assembler code for the thunk function specified by the arguments + it is passed, and false otherwise. */ +static bool +sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta ATTRIBUTE_UNUSED, + HOST_WIDE_INT vcall_offset, + const_tree function ATTRIBUTE_UNUSED) +{ + /* Bound the loop used in the default method above. */ + return (vcall_offset >= -32768 || ! fixed_regs[5]); +} + +/* We use the machine specific reorg pass to enable workarounds for errata. */ + +static void +sparc_reorg (void) +{ + rtx insn, next; + + /* The only erratum we handle for now is that of the AT697F processor. */ + if (!sparc_fix_at697f) + return; + + /* We need to have the (essentially) final form of the insn stream in order + to properly detect the various hazards. Run delay slot scheduling. */ + if (optimize > 0 && flag_delayed_branch) + { + cleanup_barriers (); + dbr_schedule (get_insns ()); + } + + /* Now look for specific patterns in the insn stream. */ + for (insn = get_insns (); insn; insn = next) + { + bool insert_nop = false; + rtx set; + + /* Look for a single-word load into an odd-numbered FP register. */ + if (NONJUMP_INSN_P (insn) + && (set = single_set (insn)) != NULL_RTX + && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 + && MEM_P (SET_SRC (set)) + && REG_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) > 31 + && REGNO (SET_DEST (set)) % 2 != 0) + { + /* The wrong dependency is on the enclosing double register. */ + unsigned int x = REGNO (SET_DEST (set)) - 1; + unsigned int src1, src2, dest; + int code; + + /* If the insn has a delay slot, then it cannot be problematic. */ + next = next_active_insn (insn); + if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE) + code = -1; + else + { + extract_insn (next); + code = INSN_CODE (next); + } + + switch (code) + { + case CODE_FOR_adddf3: + case CODE_FOR_subdf3: + case CODE_FOR_muldf3: + case CODE_FOR_divdf3: + dest = REGNO (recog_data.operand[0]); + src1 = REGNO (recog_data.operand[1]); + src2 = REGNO (recog_data.operand[2]); + if (src1 != src2) + { + /* Case [1-4]: + ld [address], %fx+1 + FPOPd %f{x,y}, %f{y,x}, %f{x,y} */ + if ((src1 == x || src2 == x) + && (dest == src1 || dest == src2)) + insert_nop = true; + } + else + { + /* Case 5: + ld [address], %fx+1 + FPOPd %fx, %fx, %fx */ + if (src1 == x + && dest == src1 + && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3)) + insert_nop = true; + } + break; + + case CODE_FOR_sqrtdf2: + dest = REGNO (recog_data.operand[0]); + src1 = REGNO (recog_data.operand[1]); + /* Case 6: + ld [address], %fx+1 + fsqrtd %fx, %fx */ + if (src1 == x && dest == src1) + insert_nop = true; + break; + + default: + break; + } + } + else + next = NEXT_INSN (insn); + + if (insert_nop) + emit_insn_after (gen_nop (), insn); + } +} + +/* How to allocate a 'struct machine_function'. */ + +static struct machine_function * +sparc_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* Locate some local-dynamic symbol still in use by this function + so that we can print its name in local-dynamic base patterns. */ + +static const char * +get_some_local_dynamic_name (void) +{ + rtx insn; + + if (cfun->machine->some_ld_name) + return cfun->machine->some_ld_name; + + for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) + return cfun->machine->some_ld_name; + + gcc_unreachable (); +} + +static int +get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + + if (x + && GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) + { + cfun->machine->some_ld_name = XSTR (x, 0); + return 1; + } + + return 0; +} + +/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. + This is called from dwarf2out.c to emit call frame instructions + for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ +static void +sparc_dwarf_handle_frame_unspec (const char *label, + rtx pattern ATTRIBUTE_UNUSED, + int index ATTRIBUTE_UNUSED) +{ + gcc_assert (index == UNSPECV_SAVEW); + dwarf2out_window_save (label); +} + +/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. + We need to emit DTP-relative relocations. */ + +static void +sparc_output_dwarf_dtprel (FILE *file, int size, rtx x) +{ + switch (size) + { + case 4: + fputs ("\t.word\t%r_tls_dtpoff32(", file); + break; + case 8: + fputs ("\t.xword\t%r_tls_dtpoff64(", file); + break; + default: + gcc_unreachable (); + } + output_addr_const (file, x); + fputs (")", file); +} + +/* Do whatever processing is required at the end of a file. */ + +static void +sparc_file_end (void) +{ + /* If we need to emit the special GOT helper function, do so now. */ + if (got_helper_rtx) + { + const char *name = XSTR (got_helper_rtx, 0); + const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM]; +#ifdef DWARF2_UNWIND_INFO + bool do_cfi; +#endif + + if (USE_HIDDEN_LINKONCE) + { + tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, + get_identifier (name), + build_function_type (void_type_node, + void_list_node)); + DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, + NULL_TREE, void_type_node); + TREE_PUBLIC (decl) = 1; + TREE_STATIC (decl) = 1; + make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); + DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (decl) = 1; + resolve_unique_section (decl, 0, flag_function_sections); + allocate_struct_function (decl, true); + cfun->is_thunk = 1; + current_function_decl = decl; + init_varasm_status (); + assemble_start_function (decl, name); + } + else + { + const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); + switch_to_section (text_section); + if (align > 0) + ASM_OUTPUT_ALIGN (asm_out_file, align); + ASM_OUTPUT_LABEL (asm_out_file, name); + } + +#ifdef DWARF2_UNWIND_INFO + do_cfi = dwarf2out_do_cfi_asm (); + if (do_cfi) + fprintf (asm_out_file, "\t.cfi_startproc\n"); +#endif + if (flag_delayed_branch) + fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n", + reg_name, reg_name); + else + fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n", + reg_name, reg_name); +#ifdef DWARF2_UNWIND_INFO + if (do_cfi) + fprintf (asm_out_file, "\t.cfi_endproc\n"); +#endif + } + + if (NEED_INDICATE_EXEC_STACK) + file_end_indicate_exec_stack (); +} + +#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING +/* Implement TARGET_MANGLE_TYPE. */ + +static const char * +sparc_mangle_type (const_tree type) +{ + if (!TARGET_64BIT + && TYPE_MAIN_VARIANT (type) == long_double_type_node + && TARGET_LONG_DOUBLE_128) + return "g"; + + /* For all other types, use normal C++ mangling. */ + return NULL; +} +#endif + +/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit + compare and swap on the word containing the byte or half-word. */ + +void +sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval) +{ + rtx addr1 = force_reg (Pmode, XEXP (mem, 0)); + rtx addr = gen_reg_rtx (Pmode); + rtx off = gen_reg_rtx (SImode); + rtx oldv = gen_reg_rtx (SImode); + rtx newv = gen_reg_rtx (SImode); + rtx oldvalue = gen_reg_rtx (SImode); + rtx newvalue = gen_reg_rtx (SImode); + rtx res = gen_reg_rtx (SImode); + rtx resv = gen_reg_rtx (SImode); + rtx memsi, val, mask, end_label, loop_label, cc; + + emit_insn (gen_rtx_SET (VOIDmode, addr, + gen_rtx_AND (Pmode, addr1, GEN_INT (-4)))); + + if (Pmode != SImode) + addr1 = gen_lowpart (SImode, addr1); + emit_insn (gen_rtx_SET (VOIDmode, off, + gen_rtx_AND (SImode, addr1, GEN_INT (3)))); + + memsi = gen_rtx_MEM (SImode, addr); + set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); + MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); + + val = force_reg (SImode, memsi); + + emit_insn (gen_rtx_SET (VOIDmode, off, + gen_rtx_XOR (SImode, off, + GEN_INT (GET_MODE (mem) == QImode + ? 3 : 2)))); + + emit_insn (gen_rtx_SET (VOIDmode, off, + gen_rtx_ASHIFT (SImode, off, GEN_INT (3)))); + + if (GET_MODE (mem) == QImode) + mask = force_reg (SImode, GEN_INT (0xff)); + else + mask = force_reg (SImode, GEN_INT (0xffff)); + + emit_insn (gen_rtx_SET (VOIDmode, mask, + gen_rtx_ASHIFT (SImode, mask, off))); + + emit_insn (gen_rtx_SET (VOIDmode, val, + gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), + val))); + + oldval = gen_lowpart (SImode, oldval); + emit_insn (gen_rtx_SET (VOIDmode, oldv, + gen_rtx_ASHIFT (SImode, oldval, off))); + + newval = gen_lowpart_common (SImode, newval); + emit_insn (gen_rtx_SET (VOIDmode, newv, + gen_rtx_ASHIFT (SImode, newval, off))); + + emit_insn (gen_rtx_SET (VOIDmode, oldv, + gen_rtx_AND (SImode, oldv, mask))); + + emit_insn (gen_rtx_SET (VOIDmode, newv, + gen_rtx_AND (SImode, newv, mask))); + + end_label = gen_label_rtx (); + loop_label = gen_label_rtx (); + emit_label (loop_label); + + emit_insn (gen_rtx_SET (VOIDmode, oldvalue, + gen_rtx_IOR (SImode, oldv, val))); + + emit_insn (gen_rtx_SET (VOIDmode, newvalue, + gen_rtx_IOR (SImode, newv, val))); + + emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue)); + + emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label); + + emit_insn (gen_rtx_SET (VOIDmode, resv, + gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), + res))); + + cc = gen_compare_reg_1 (NE, resv, val); + emit_insn (gen_rtx_SET (VOIDmode, val, resv)); + + /* Use cbranchcc4 to separate the compare and branch! */ + emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx), + cc, const0_rtx, loop_label)); + + emit_label (end_label); + + emit_insn (gen_rtx_SET (VOIDmode, res, + gen_rtx_AND (SImode, res, mask))); + + emit_insn (gen_rtx_SET (VOIDmode, res, + gen_rtx_LSHIFTRT (SImode, res, off))); + + emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); +} + +/* Implement TARGET_FRAME_POINTER_REQUIRED. */ + +bool +sparc_frame_pointer_required (void) +{ + return !(current_function_is_leaf && only_leaf_regs_used ()); +} + +/* The way this is structured, we can't eliminate SFP in favor of SP + if the frame pointer is required: we want to use the SFP->HFP elimination + in that case. But the test in update_eliminables doesn't know we are + assuming below that we only do the former elimination. */ + +bool +sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return (to == HARD_FRAME_POINTER_REGNUM + || !targetm.frame_pointer_required ()); +} + +/* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that + they won't be allocated. */ + +static void +sparc_conditional_register_usage (void) +{ + if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) + { + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + } + /* If the user has passed -f{fixed,call-{used,saved}}-g5 */ + /* then honor it. */ + if (TARGET_ARCH32 && fixed_regs[5]) + fixed_regs[5] = 1; + else if (TARGET_ARCH64 && fixed_regs[5] == 2) + fixed_regs[5] = 0; + if (! TARGET_V9) + { + int regno; + for (regno = SPARC_FIRST_V9_FP_REG; + regno <= SPARC_LAST_V9_FP_REG; + regno++) + fixed_regs[regno] = 1; + /* %fcc0 is used by v8 and v9. */ + for (regno = SPARC_FIRST_V9_FCC_REG + 1; + regno <= SPARC_LAST_V9_FCC_REG; + regno++) + fixed_regs[regno] = 1; + } + if (! TARGET_FPU) + { + int regno; + for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++) + fixed_regs[regno] = 1; + } + /* If the user has passed -f{fixed,call-{used,saved}}-g2 */ + /* then honor it. Likewise with g3 and g4. */ + if (fixed_regs[2] == 2) + fixed_regs[2] = ! TARGET_APP_REGS; + if (fixed_regs[3] == 2) + fixed_regs[3] = ! TARGET_APP_REGS; + if (TARGET_ARCH32 && fixed_regs[4] == 2) + fixed_regs[4] = ! TARGET_APP_REGS; + else if (TARGET_CM_EMBMEDANY) + fixed_regs[4] = 1; + else if (fixed_regs[4] == 2) + fixed_regs[4] = 0; +} + +#include "gt-sparc.h" diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h new file mode 100644 index 000000000..31e6d123b --- /dev/null +++ b/gcc/config/sparc/sparc.h @@ -0,0 +1,2122 @@ +/* Definitions of target machine for GNU compiler, for Sun SPARC. + Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997, 1998, 1999 + 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Michael Tiemann (tiemann@cygnus.com). + 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, + at Cygnus Support. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config/vxworks-dummy.h" + +/* Note that some other tm.h files include this one and then override + whatever definitions are necessary. */ + +/* Define the specific costs for a given cpu */ + +struct processor_costs { + /* Integer load */ + const int int_load; + + /* Integer signed load */ + const int int_sload; + + /* Integer zeroed load */ + const int int_zload; + + /* Float load */ + const int float_load; + + /* fmov, fneg, fabs */ + const int float_move; + + /* fadd, fsub */ + const int float_plusminus; + + /* fcmp */ + const int float_cmp; + + /* fmov, fmovr */ + const int float_cmove; + + /* fmul */ + const int float_mul; + + /* fdivs */ + const int float_div_sf; + + /* fdivd */ + const int float_div_df; + + /* fsqrts */ + const int float_sqrt_sf; + + /* fsqrtd */ + const int float_sqrt_df; + + /* umul/smul */ + const int int_mul; + + /* mulX */ + const int int_mulX; + + /* integer multiply cost for each bit set past the most + significant 3, so the formula for multiply cost becomes: + + if (rs1 < 0) + highest_bit = highest_clear_bit(rs1); + else + highest_bit = highest_set_bit(rs1); + if (highest_bit < 3) + highest_bit = 3; + cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor); + + A value of zero indicates that the multiply costs is fixed, + and not variable. */ + const int int_mul_bit_factor; + + /* udiv/sdiv */ + const int int_div; + + /* divX */ + const int int_divX; + + /* movcc, movr */ + const int int_cmove; + + /* penalty for shifts, due to scheduling rules etc. */ + const int shift_penalty; +}; + +extern const struct processor_costs *sparc_costs; + +/* Target CPU builtins. FIXME: Defining sparc is for the benefit of + Solaris only; otherwise just define __sparc__. Sadly the headers + are such a mess there is no Solaris-specific header. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("sparc"); \ + if (TARGET_64BIT) \ + { \ + builtin_assert ("cpu=sparc64"); \ + builtin_assert ("machine=sparc64"); \ + } \ + else \ + { \ + builtin_assert ("cpu=sparc"); \ + builtin_assert ("machine=sparc"); \ + } \ + } \ + while (0) + +/* Specify this in a cover file to provide bi-architecture (32/64) support. */ +/* #define SPARC_BI_ARCH */ + +/* Macro used later in this file to determine default architecture. */ +#define DEFAULT_ARCH32_P ((TARGET_DEFAULT & MASK_64BIT) == 0) + +/* TARGET_ARCH{32,64} are the main macros to decide which of the two + architectures to compile for. We allow targets to choose compile time or + runtime selection. */ +#ifdef IN_LIBGCC2 +#if defined(__sparcv9) || defined(__arch64__) +#define TARGET_ARCH32 0 +#else +#define TARGET_ARCH32 1 +#endif /* sparc64 */ +#else +#ifdef SPARC_BI_ARCH +#define TARGET_ARCH32 (! TARGET_64BIT) +#else +#define TARGET_ARCH32 (DEFAULT_ARCH32_P) +#endif /* SPARC_BI_ARCH */ +#endif /* IN_LIBGCC2 */ +#define TARGET_ARCH64 (! TARGET_ARCH32) + +/* Code model selection in 64-bit environment. + + The machine mode used for addresses is 32-bit wide: + + TARGET_CM_32: 32-bit address space. + It is the code model used when generating 32-bit code. + + The machine mode used for addresses is 64-bit wide: + + TARGET_CM_MEDLOW: 32-bit address space. + The executable must be in the low 32 bits of memory. + This avoids generating %uhi and %ulo terms. Programs + can be statically or dynamically linked. + + TARGET_CM_MEDMID: 44-bit address space. + The executable must be in the low 44 bits of memory, + and the %[hml]44 terms are used. The text and data + segments have a maximum size of 2GB (31-bit span). + The maximum offset from any instruction to the label + _GLOBAL_OFFSET_TABLE_ is 2GB (31-bit span). + + TARGET_CM_MEDANY: 64-bit address space. + The text and data segments have a maximum size of 2GB + (31-bit span) and may be located anywhere in memory. + The maximum offset from any instruction to the label + _GLOBAL_OFFSET_TABLE_ is 2GB (31-bit span). + + TARGET_CM_EMBMEDANY: 64-bit address space. + The text and data segments have a maximum size of 2GB + (31-bit span) and may be located anywhere in memory. + The global register %g4 contains the start address of + the data segment. Programs are statically linked and + PIC is not supported. + + Different code models are not supported in 32-bit environment. */ + +enum cmodel { + CM_32, + CM_MEDLOW, + CM_MEDMID, + CM_MEDANY, + CM_EMBMEDANY +}; + +/* One of CM_FOO. */ +extern enum cmodel sparc_cmodel; + +/* V9 code model selection. */ +#define TARGET_CM_MEDLOW (sparc_cmodel == CM_MEDLOW) +#define TARGET_CM_MEDMID (sparc_cmodel == CM_MEDMID) +#define TARGET_CM_MEDANY (sparc_cmodel == CM_MEDANY) +#define TARGET_CM_EMBMEDANY (sparc_cmodel == CM_EMBMEDANY) + +#define SPARC_DEFAULT_CMODEL CM_32 + +/* The SPARC-V9 architecture defines a relaxed memory ordering model (RMO) + which requires the following macro to be true if enabled. Prior to V9, + there are no instructions to even talk about memory synchronization. + Note that the UltraSPARC III processors don't implement RMO, unlike the + UltraSPARC II processors. Niagara and Niagara-2 do not implement RMO + either. + + Default to false; for example, Solaris never enables RMO, only ever uses + total memory ordering (TMO). */ +#define SPARC_RELAXED_ORDERING false + +/* Do not use the .note.GNU-stack convention by default. */ +#define NEED_INDICATE_EXEC_STACK 0 + +/* This is call-clobbered in the normal ABI, but is reserved in the + home grown (aka upward compatible) embedded ABI. */ +#define EMBMEDANY_BASE_REG "%g4" + +/* Values of TARGET_CPU_DEFAULT, set via -D in the Makefile, + and specified by the user via --with-cpu=foo. + This specifies the cpu implementation, not the architecture size. */ +/* Note that TARGET_CPU_v9 is assumed to start the list of 64-bit + capable cpu's. */ +#define TARGET_CPU_sparc 0 +#define TARGET_CPU_v7 0 /* alias */ +#define TARGET_CPU_cypress 0 /* alias */ +#define TARGET_CPU_v8 1 /* generic v8 implementation */ +#define TARGET_CPU_supersparc 2 +#define TARGET_CPU_hypersparc 3 +#define TARGET_CPU_leon 4 +#define TARGET_CPU_sparclite 5 +#define TARGET_CPU_f930 5 /* alias */ +#define TARGET_CPU_f934 5 /* alias */ +#define TARGET_CPU_sparclite86x 6 +#define TARGET_CPU_sparclet 7 +#define TARGET_CPU_tsc701 7 /* alias */ +#define TARGET_CPU_v9 8 /* generic v9 implementation */ +#define TARGET_CPU_sparcv9 8 /* alias */ +#define TARGET_CPU_sparc64 8 /* alias */ +#define TARGET_CPU_ultrasparc 9 +#define TARGET_CPU_ultrasparc3 10 +#define TARGET_CPU_niagara 11 +#define TARGET_CPU_niagara2 12 + +#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \ + || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \ + || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 \ + || TARGET_CPU_DEFAULT == TARGET_CPU_niagara \ + || TARGET_CPU_DEFAULT == TARGET_CPU_niagara2 + +#define CPP_CPU32_DEFAULT_SPEC "" +#define ASM_CPU32_DEFAULT_SPEC "" + +#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 +/* ??? What does Sun's CC pass? */ +#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__" +/* ??? It's not clear how other assemblers will handle this, so by default + use GAS. Sun's Solaris assembler recognizes -xarch=v8plus, but this case + is handled in sol2.h. */ +#define ASM_CPU64_DEFAULT_SPEC "-Av9" +#endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc +#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__" +#define ASM_CPU64_DEFAULT_SPEC "-Av9a" +#endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 +#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__" +#define ASM_CPU64_DEFAULT_SPEC "-Av9b" +#endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara +#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__" +#define ASM_CPU64_DEFAULT_SPEC "-Av9b" +#endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara2 +#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__" +#define ASM_CPU64_DEFAULT_SPEC "-Av9b" +#endif + +#else + +#define CPP_CPU64_DEFAULT_SPEC "" +#define ASM_CPU64_DEFAULT_SPEC "" + +#if TARGET_CPU_DEFAULT == TARGET_CPU_sparc \ + || TARGET_CPU_DEFAULT == TARGET_CPU_v8 +#define CPP_CPU32_DEFAULT_SPEC "" +#define ASM_CPU32_DEFAULT_SPEC "" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclet +#define CPP_CPU32_DEFAULT_SPEC "-D__sparclet__" +#define ASM_CPU32_DEFAULT_SPEC "-Asparclet" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclite +#define CPP_CPU32_DEFAULT_SPEC "-D__sparclite__" +#define ASM_CPU32_DEFAULT_SPEC "-Asparclite" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclite86x +#define CPP_CPU32_DEFAULT_SPEC "-D__sparclite86x__" +#define ASM_CPU32_DEFAULT_SPEC "-Asparclite" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_supersparc +#define CPP_CPU32_DEFAULT_SPEC "-D__supersparc__ -D__sparc_v8__" +#define ASM_CPU32_DEFAULT_SPEC "" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_hypersparc +#define CPP_CPU32_DEFAULT_SPEC "-D__hypersparc__ -D__sparc_v8__" +#define ASM_CPU32_DEFAULT_SPEC "" +#endif + +#if TARGET_CPU_DEFAULT == TARGET_CPU_leon +#define CPP_CPU32_DEFAULT_SPEC "-D__leon__ -D__sparc_v8__" +#define ASM_CPU32_DEFAULT_SPEC "" +#endif + +#endif + +#if !defined(CPP_CPU32_DEFAULT_SPEC) || !defined(CPP_CPU64_DEFAULT_SPEC) + #error Unrecognized value in TARGET_CPU_DEFAULT. +#endif + +#ifdef SPARC_BI_ARCH + +#define CPP_CPU_DEFAULT_SPEC \ +(DEFAULT_ARCH32_P ? "\ +%{m64:" CPP_CPU64_DEFAULT_SPEC "} \ +%{!m64:" CPP_CPU32_DEFAULT_SPEC "} \ +" : "\ +%{m32:" CPP_CPU32_DEFAULT_SPEC "} \ +%{!m32:" CPP_CPU64_DEFAULT_SPEC "} \ +") +#define ASM_CPU_DEFAULT_SPEC \ +(DEFAULT_ARCH32_P ? "\ +%{m64:" ASM_CPU64_DEFAULT_SPEC "} \ +%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \ +" : "\ +%{m32:" ASM_CPU32_DEFAULT_SPEC "} \ +%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \ +") + +#else /* !SPARC_BI_ARCH */ + +#define CPP_CPU_DEFAULT_SPEC (DEFAULT_ARCH32_P ? CPP_CPU32_DEFAULT_SPEC : CPP_CPU64_DEFAULT_SPEC) +#define ASM_CPU_DEFAULT_SPEC (DEFAULT_ARCH32_P ? ASM_CPU32_DEFAULT_SPEC : ASM_CPU64_DEFAULT_SPEC) + +#endif /* !SPARC_BI_ARCH */ + +/* Define macros to distinguish architectures. */ + +/* Common CPP definitions used by CPP_SPEC amongst the various targets + for handling -mcpu=xxx switches. */ +#define CPP_CPU_SPEC "\ +%{msoft-float:-D_SOFT_FLOAT} \ +%{mcpu=sparclet:-D__sparclet__} %{mcpu=tsc701:-D__sparclet__} \ +%{mcpu=sparclite:-D__sparclite__} \ +%{mcpu=f930:-D__sparclite__} %{mcpu=f934:-D__sparclite__} \ +%{mcpu=sparclite86x:-D__sparclite86x__} \ +%{mcpu=v8:-D__sparc_v8__} \ +%{mcpu=supersparc:-D__supersparc__ -D__sparc_v8__} \ +%{mcpu=hypersparc:-D__hypersparc__ -D__sparc_v8__} \ +%{mcpu=leon:-D__leon__ -D__sparc_v8__} \ +%{mcpu=v9:-D__sparc_v9__} \ +%{mcpu=ultrasparc:-D__sparc_v9__} \ +%{mcpu=ultrasparc3:-D__sparc_v9__} \ +%{mcpu=niagara:-D__sparc_v9__} \ +%{mcpu=niagara2:-D__sparc_v9__} \ +%{!mcpu*:%(cpp_cpu_default)} \ +" +#define CPP_ARCH32_SPEC "" +#define CPP_ARCH64_SPEC "-D__arch64__" + +#define CPP_ARCH_DEFAULT_SPEC \ +(DEFAULT_ARCH32_P ? CPP_ARCH32_SPEC : CPP_ARCH64_SPEC) + +#define CPP_ARCH_SPEC "\ +%{m32:%(cpp_arch32)} \ +%{m64:%(cpp_arch64)} \ +%{!m32:%{!m64:%(cpp_arch_default)}} \ +" + +/* Macro to distinguish endianness. */ +#define CPP_ENDIAN_SPEC "\ +%{mlittle-endian:-D__LITTLE_ENDIAN__}" + +/* Macros to distinguish the particular subtarget. */ +#define CPP_SUBTARGET_SPEC "" + +#define CPP_SPEC "%(cpp_cpu) %(cpp_arch) %(cpp_endian) %(cpp_subtarget)" + +/* This used to translate -dalign to -malign, but that is no good + because it can't turn off the usual meaning of making debugging dumps. */ + +#define CC1_SPEC "" + +/* Override in target specific files. */ +#define ASM_CPU_SPEC "\ +%{mcpu=sparclet:-Asparclet} %{mcpu=tsc701:-Asparclet} \ +%{mcpu=sparclite:-Asparclite} \ +%{mcpu=sparclite86x:-Asparclite} \ +%{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \ +%{mcpu=v8:-Av8} \ +%{mcpu=supersparc:-Av8} \ +%{mcpu=hypersparc:-Av8} \ +%{mcpu=leon:-Av8} \ +%{mv8plus:-Av8plus} \ +%{mcpu=v9:-Av9} \ +%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \ +%{mcpu=ultrasparc3:%{!mv8plus:-Av9b}} \ +%{mcpu=niagara:%{!mv8plus:-Av9b}} \ +%{mcpu=niagara2:%{!mv8plus:-Av9b}} \ +%{!mcpu*:%(asm_cpu_default)} \ +" + +/* Word size selection, among other things. + This is what GAS uses. Add %(asm_arch) to ASM_SPEC to enable. */ + +#define ASM_ARCH32_SPEC "-32" +#ifdef HAVE_AS_REGISTER_PSEUDO_OP +#define ASM_ARCH64_SPEC "-64 -no-undeclared-regs" +#else +#define ASM_ARCH64_SPEC "-64" +#endif +#define ASM_ARCH_DEFAULT_SPEC \ +(DEFAULT_ARCH32_P ? ASM_ARCH32_SPEC : ASM_ARCH64_SPEC) + +#define ASM_ARCH_SPEC "\ +%{m32:%(asm_arch32)} \ +%{m64:%(asm_arch64)} \ +%{!m32:%{!m64:%(asm_arch_default)}} \ +" + +#ifdef HAVE_AS_RELAX_OPTION +#define ASM_RELAX_SPEC "%{!mno-relax:-relax}" +#else +#define ASM_RELAX_SPEC "" +#endif + +/* Special flags to the Sun-4 assembler when using pipe for input. */ + +#define ASM_SPEC "\ +%{!pg:%{!p:%{fpic|fPIC|fpie|fPIE:-k}}} %{keep-local-as-symbols:-L} \ +%(asm_cpu) %(asm_relax)" + +/* This macro defines names of additional specifications to put in the specs + that can be used in various specifications like CC1_SPEC. Its definition + is an initializer with a subgrouping for each command option. + + Each subgrouping contains a string constant, that defines the + specification name, and a string constant that used by the GCC driver + program. + + Do not define this macro if it does not need to do anything. */ + +#define EXTRA_SPECS \ + { "cpp_cpu", CPP_CPU_SPEC }, \ + { "cpp_cpu_default", CPP_CPU_DEFAULT_SPEC }, \ + { "cpp_arch32", CPP_ARCH32_SPEC }, \ + { "cpp_arch64", CPP_ARCH64_SPEC }, \ + { "cpp_arch_default", CPP_ARCH_DEFAULT_SPEC },\ + { "cpp_arch", CPP_ARCH_SPEC }, \ + { "cpp_endian", CPP_ENDIAN_SPEC }, \ + { "cpp_subtarget", CPP_SUBTARGET_SPEC }, \ + { "asm_cpu", ASM_CPU_SPEC }, \ + { "asm_cpu_default", ASM_CPU_DEFAULT_SPEC }, \ + { "asm_arch32", ASM_ARCH32_SPEC }, \ + { "asm_arch64", ASM_ARCH64_SPEC }, \ + { "asm_relax", ASM_RELAX_SPEC }, \ + { "asm_arch_default", ASM_ARCH_DEFAULT_SPEC },\ + { "asm_arch", ASM_ARCH_SPEC }, \ + SUBTARGET_EXTRA_SPECS + +#define SUBTARGET_EXTRA_SPECS + +/* Because libgcc can generate references back to libc (via .umul etc.) we have + to list libc again after the second libgcc. */ +#define LINK_GCC_C_SEQUENCE_SPEC "%G %L %G %L" + + +#define PTRDIFF_TYPE (TARGET_ARCH64 ? "long int" : "int") +#define SIZE_TYPE (TARGET_ARCH64 ? "long unsigned int" : "unsigned int") + +/* ??? This should be 32 bits for v9 but what can we do? */ +#define WCHAR_TYPE "short unsigned int" +#define WCHAR_TYPE_SIZE 16 + +/* Mask of all CPU selection flags. */ +#define MASK_ISA \ +(MASK_V8 + MASK_SPARCLITE + MASK_SPARCLET + MASK_V9 + MASK_DEPRECATED_V8_INSNS) + +/* TARGET_HARD_MUL: Use hardware multiply instructions but not %y. + TARGET_HARD_MUL32: Use hardware multiply instructions with rd %y + to get high 32 bits. False in V8+ or V9 because multiply stores + a 64-bit result in a register. */ + +#define TARGET_HARD_MUL32 \ + ((TARGET_V8 || TARGET_SPARCLITE \ + || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS) \ + && ! TARGET_V8PLUS && TARGET_ARCH32) + +#define TARGET_HARD_MUL \ + (TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET \ + || TARGET_DEPRECATED_V8_INSNS || TARGET_V8PLUS) + +/* MASK_APP_REGS must always be the default because that's what + FIXED_REGISTERS is set to and -ffixed- is processed before + TARGET_CONDITIONAL_REGISTER_USAGE is called (where we process + -mno-app-regs). */ +#define TARGET_DEFAULT (MASK_APP_REGS + MASK_FPU) + +/* Processor type. + These must match the values for the cpu attribute in sparc.md. */ +enum processor_type { + PROCESSOR_V7, + PROCESSOR_CYPRESS, + PROCESSOR_V8, + PROCESSOR_SUPERSPARC, + PROCESSOR_HYPERSPARC, + PROCESSOR_LEON, + PROCESSOR_SPARCLITE, + PROCESSOR_F930, + PROCESSOR_F934, + PROCESSOR_SPARCLITE86X, + PROCESSOR_SPARCLET, + PROCESSOR_TSC701, + PROCESSOR_V9, + PROCESSOR_ULTRASPARC, + PROCESSOR_ULTRASPARC3, + PROCESSOR_NIAGARA, + PROCESSOR_NIAGARA2 +}; + +/* This is set from -m{cpu,tune}=xxx. */ +extern enum processor_type sparc_cpu; + +/* Recast the cpu class to be the cpu attribute. + Every file includes us, but not every file includes insn-attr.h. */ +#define sparc_cpu_attr ((enum attr_cpu) sparc_cpu) + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-cpu is ignored if -mcpu is specified. + --with-tune is ignored if -mtune is specified. + --with-float is ignored if -mhard-float, -msoft-float, -mfpu, or -mno-fpu + are specified. */ +#define OPTION_DEFAULT_SPECS \ + {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \ + {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" } + +/* sparc_select[0] is reserved for the default cpu. */ +struct sparc_cpu_select +{ + const char *string; + const char *const name; + const int set_tune_p; + const int set_arch_p; +}; + +extern struct sparc_cpu_select sparc_select[]; + +/* target machine storage layout */ + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ +#define BITS_BIG_ENDIAN 1 + +/* Define this if most significant byte of a word is the lowest numbered. */ +#define BYTES_BIG_ENDIAN 1 + +/* Define this if most significant word of a multiword number is the lowest + numbered. */ +#define WORDS_BIG_ENDIAN 1 + +#define MAX_BITS_PER_WORD 64 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD (TARGET_ARCH64 ? 8 : 4) +#ifdef IN_LIBGCC2 +#define MIN_UNITS_PER_WORD UNITS_PER_WORD +#else +#define MIN_UNITS_PER_WORD 4 +#endif + +/* Now define the sizes of the C data types. */ + +#define SHORT_TYPE_SIZE 16 +#define INT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE (TARGET_ARCH64 ? 64 : 32) +#define LONG_LONG_TYPE_SIZE 64 +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 + +/* LONG_DOUBLE_TYPE_SIZE is defined per OS even though the + SPARC ABI says that it is 128-bit wide. */ +/* #define LONG_DOUBLE_TYPE_SIZE 128 */ + +/* The widest floating-point format really supported by the hardware. */ +#define WIDEST_HARDWARE_FP_SIZE 64 + +/* Width in bits of a pointer. This is the size of ptr_mode. */ +#define POINTER_SIZE (TARGET_PTR64 ? 64 : 32) + +/* This is the machine mode used for addresses. */ +#define Pmode (TARGET_ARCH64 ? DImode : SImode) + +/* If we have to extend pointers (only when TARGET_ARCH64 and not + TARGET_PTR64), we want to do it unsigned. This macro does nothing + if ptr_mode and Pmode are the same. */ +#define POINTERS_EXTEND_UNSIGNED 1 + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY (TARGET_ARCH64 ? 64 : 32) + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +/* FIXME, this is wrong when TARGET_ARCH64 and TARGET_STACK_BIAS, because + then %sp+2047 is 128-bit aligned so %sp is really only byte-aligned. */ +#define STACK_BOUNDARY (TARGET_ARCH64 ? 128 : 64) +/* Temporary hack until the FIXME above is fixed. */ +#define SPARC_STACK_BOUNDARY_HACK (TARGET_ARCH64 && TARGET_STACK_BIAS) + +/* ALIGN FRAMES on double word boundaries */ + +#define SPARC_STACK_ALIGN(LOC) \ + (TARGET_ARCH64 ? (((LOC)+15) & ~15) : (((LOC)+7) & ~7)) + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY (TARGET_ARCH64 ? 64 : 32) + +/* Every structure's size must be a multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* A bit-field declared as `int' forces `int' alignment for the struct. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT (TARGET_ARCH64 ? 128 : 64) + +/* The best alignment to use in cases where we have a choice. */ +#define FASTEST_ALIGNMENT 64 + +/* Define this macro as an expression for the alignment of a structure + (given by STRUCT as a tree node) if the alignment computed in the + usual way is COMPUTED and the alignment explicitly specified was + SPECIFIED. + + The default is to use SPECIFIED if it is larger; otherwise, use + the smaller of COMPUTED and `BIGGEST_ALIGNMENT' */ +#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED) \ + (TARGET_FASTER_STRUCTS ? \ + ((TREE_CODE (STRUCT) == RECORD_TYPE \ + || TREE_CODE (STRUCT) == UNION_TYPE \ + || TREE_CODE (STRUCT) == QUAL_UNION_TYPE) \ + && TYPE_FIELDS (STRUCT) != 0 \ + ? MAX (MAX ((COMPUTED), (SPECIFIED)), BIGGEST_ALIGNMENT) \ + : MAX ((COMPUTED), (SPECIFIED))) \ + : MAX ((COMPUTED), (SPECIFIED))) + +/* Make strings word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < FASTEST_ALIGNMENT) \ + ? FASTEST_ALIGNMENT : (ALIGN)) + +/* Make arrays of chars word-aligned for the same reasons. */ +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN)) + +/* Make local arrays of chars word-aligned for the same reasons. */ +#define LOCAL_ALIGNMENT(TYPE, ALIGN) DATA_ALIGNMENT (TYPE, ALIGN) + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 1 + +/* Things that must be doubleword aligned cannot go in the text section, + because the linker fails to align the text section enough! + Put them in the data section. This macro is only used in this file. */ +#define MAX_TEXT_ALIGN 32 + +/* Standard register usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. + + SPARC has 32 integer registers and 32 floating point registers. + 64-bit SPARC has 32 additional fp regs, but the odd numbered ones are not + accessible. We still account for them to simplify register computations + (e.g.: in CLASS_MAX_NREGS). There are also 4 fp condition code registers, so + 32+32+32+4 == 100. + Register 100 is used as the integer condition code register. + Register 101 is used as the soft frame pointer register. */ + +#define FIRST_PSEUDO_REGISTER 102 + +#define SPARC_FIRST_FP_REG 32 +/* Additional V9 fp regs. */ +#define SPARC_FIRST_V9_FP_REG 64 +#define SPARC_LAST_V9_FP_REG 95 +/* V9 %fcc[0123]. V8 uses (figuratively) %fcc0. */ +#define SPARC_FIRST_V9_FCC_REG 96 +#define SPARC_LAST_V9_FCC_REG 99 +/* V8 fcc reg. */ +#define SPARC_FCC_REG 96 +/* Integer CC reg. We don't distinguish %icc from %xcc. */ +#define SPARC_ICC_REG 100 + +/* Nonzero if REGNO is an fp reg. */ +#define SPARC_FP_REG_P(REGNO) \ +((REGNO) >= SPARC_FIRST_FP_REG && (REGNO) <= SPARC_LAST_V9_FP_REG) + +/* Argument passing regs. */ +#define SPARC_OUTGOING_INT_ARG_FIRST 8 +#define SPARC_INCOMING_INT_ARG_FIRST 24 +#define SPARC_FP_ARG_FIRST 32 + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. + + On non-v9 systems: + g1 is free to use as temporary. + g2-g4 are reserved for applications. Gcc normally uses them as + temporaries, but this can be disabled via the -mno-app-regs option. + g5 through g7 are reserved for the operating system. + + On v9 systems: + g1,g5 are free to use as temporaries, and are free to use between calls + if the call is to an external function via the PLT. + g4 is free to use as a temporary in the non-embedded case. + g4 is reserved in the embedded case. + g2-g3 are reserved for applications. Gcc normally uses them as + temporaries, but this can be disabled via the -mno-app-regs option. + g6-g7 are reserved for the operating system (or application in + embedded case). + ??? Register 1 is used as a temporary by the 64 bit sethi pattern, so must + currently be a fixed register until this pattern is rewritten. + Register 1 is also used when restoring call-preserved registers in large + stack frames. + + Registers fixed in arch32 and not arch64 (or vice-versa) are marked in + TARGET_CONDITIONAL_REGISTER_USAGE in order to properly handle -ffixed-. +*/ + +#define FIXED_REGISTERS \ + {1, 0, 2, 2, 2, 2, 1, 1, \ + 0, 0, 0, 0, 0, 0, 1, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 1, 1, \ + \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + \ + 0, 0, 0, 0, 0, 1} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ + +#define CALL_USED_REGISTERS \ + {1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 1, 1, \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + \ + 1, 1, 1, 1, 1, 1} + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. + + On SPARC, ordinary registers hold 32 bits worth; + this means both integer and floating point registers. + On v9, integer regs hold 64 bits worth; floating point regs hold + 32 bits worth (this includes the new fp regs as even the odd ones are + included in the hard register count). */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + (TARGET_ARCH64 \ + ? ((REGNO) < 32 || (REGNO) == FRAME_POINTER_REGNUM \ + ? (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD \ + : (GET_MODE_SIZE (MODE) + 3) / 4) \ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +/* Due to the ARCH64 discrepancy above we must override this next + macro too. */ +#define REGMODE_NATURAL_SIZE(MODE) \ + ((TARGET_ARCH64 && FLOAT_MODE_P (MODE)) ? 4 : UNITS_PER_WORD) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. + See sparc.c for how we initialize this. */ +extern const int *hard_regno_mode_classes; +extern int sparc_mode_class[]; + +/* ??? Because of the funny way we pass parameters we should allow certain + ??? types of float/complex values to be in integer registers during + ??? RTL generation. This only matters on arch32. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + ((hard_regno_mode_classes[REGNO] & sparc_mode_class[MODE]) != 0) + +/* Value is 1 if it is OK to rename a hard register FROM to another hard + register TO. We cannot rename %g1 as it may be used before the save + register window instruction in the prologue. */ +#define HARD_REGNO_RENAME_OK(FROM, TO) ((FROM) != 1) + +/* Value is 1 if it is a good idea to tie two pseudo registers + when one has mode MODE1 and one has mode MODE2. + If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, + for any hard reg, then this must be 0 for correct output. + + For V9: SFmode can't be combined with other float modes, because they can't + be allocated to the %d registers. Also, DFmode won't fit in odd %f + registers, but SFmode will. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ + ((MODE1) == (MODE2) \ + || (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2) \ + && (! TARGET_V9 \ + || (GET_MODE_CLASS (MODE1) != MODE_FLOAT \ + || (MODE1 != SFmode && MODE2 != SFmode))))) + +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 14 + +/* The stack bias (amount by which the hardware register is offset by). */ +#define SPARC_STACK_BIAS ((TARGET_ARCH64 && TARGET_STACK_BIAS) ? 2047 : 0) + +/* Actual top-of-stack address is 92/176 greater than the contents of the + stack pointer register for !v9/v9. That is: + - !v9: 64 bytes for the in and local registers, 4 bytes for structure return + address, and 6*4 bytes for the 6 register parameters. + - v9: 128 bytes for the in and local registers + 6*8 bytes for the integer + parameter regs. */ +#define STACK_POINTER_OFFSET (FIRST_PARM_OFFSET(0) + SPARC_STACK_BIAS) + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM 30 + +/* The soft frame pointer does not have the stack bias applied. */ +#define FRAME_POINTER_REGNUM 101 + +/* Given the stack bias, the stack pointer isn't actually aligned. */ +#define INIT_EXPANDERS \ + do { \ + if (crtl->emit.regno_pointer_align && SPARC_STACK_BIAS) \ + { \ + REGNO_POINTER_ALIGN (STACK_POINTER_REGNUM) = BITS_PER_UNIT; \ + REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = BITS_PER_UNIT; \ + } \ + } while (0) + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM FRAME_POINTER_REGNUM + +/* Register in which static-chain is passed to a function. This must + not be a register used by the prologue. */ +#define STATIC_CHAIN_REGNUM (TARGET_ARCH64 ? 5 : 2) + +/* Register which holds the global offset table, if any. */ + +#define GLOBAL_OFFSET_TABLE_REGNUM 23 + +/* Register which holds offset table for position-independent + data references. */ + +#define PIC_OFFSET_TABLE_REGNUM \ + (flag_pic ? GLOBAL_OFFSET_TABLE_REGNUM : INVALID_REGNUM) + +/* Pick a default value we can notice from override_options: + !v9: Default is on. + v9: Default is off. + Originally it was -1, but later on the container of options changed to + unsigned byte, so we decided to pick 127 as default value, which does + reflect an undefined default value in case of 0/1. */ + +#define DEFAULT_PCC_STRUCT_RETURN 127 + +/* Functions which return large structures get the address + to place the wanted value at offset 64 from the frame. + Must reserve 64 bytes for the in and local registers. + v9: Functions which return large structures get the address to place the + wanted value from an invisible first argument. */ +#define STRUCT_VALUE_OFFSET 64 + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + +/* The SPARC has various kinds of registers: general, floating point, + and condition codes [well, it has others as well, but none that we + care directly about]. + + For v9 we must distinguish between the upper and lower floating point + registers because the upper ones can't hold SFmode values. + HARD_REGNO_MODE_OK won't help here because reload assumes that register(s) + satisfying a group need for a class will also satisfy a single need for + that class. EXTRA_FP_REGS is a bit of a misnomer as it covers all 64 fp + regs. + + It is important that one class contains all the general and all the standard + fp regs. Otherwise find_reg() won't properly allocate int regs for moves, + because reg_class_record() will bias the selection in favor of fp regs, + because reg_class_subunion[GENERAL_REGS][FP_REGS] will yield FP_REGS, + because FP_REGS > GENERAL_REGS. + + It is also important that one class contain all the general and all + the fp regs. Otherwise when spilling a DFmode reg, it may be from + EXTRA_FP_REGS but find_reloads() may use class + GENERAL_OR_FP_REGS. This will cause allocate_reload_reg() to die + because the compiler thinks it doesn't have a spill reg when in + fact it does. + + v9 also has 4 floating point condition code registers. Since we don't + have a class that is the union of FPCC_REGS with either of the others, + it is important that it appear first. Otherwise the compiler will die + trying to compile _fixunsdfsi because fix_truncdfsi2 won't match its + constraints. + + It is important that SPARC_ICC_REG have class NO_REGS. Otherwise combine + may try to use it to hold an SImode value. See register_operand. + ??? Should %fcc[0123] be handled similarly? +*/ + +enum reg_class { NO_REGS, FPCC_REGS, I64_REGS, GENERAL_REGS, FP_REGS, + EXTRA_FP_REGS, GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS, + ALL_REGS, LIM_REG_CLASSES }; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ + +#define REG_CLASS_NAMES \ + { "NO_REGS", "FPCC_REGS", "I64_REGS", "GENERAL_REGS", "FP_REGS", \ + "EXTRA_FP_REGS", "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS", \ + "ALL_REGS" } + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ + {{0, 0, 0, 0}, /* NO_REGS */ \ + {0, 0, 0, 0xf}, /* FPCC_REGS */ \ + {0xffff, 0, 0, 0}, /* I64_REGS */ \ + {-1, 0, 0, 0x20}, /* GENERAL_REGS */ \ + {0, -1, 0, 0}, /* FP_REGS */ \ + {0, -1, -1, 0}, /* EXTRA_FP_REGS */ \ + {-1, -1, 0, 0x20}, /* GENERAL_OR_FP_REGS */ \ + {-1, -1, -1, 0x20}, /* GENERAL_OR_EXTRA_FP_REGS */ \ + {-1, -1, -1, 0x3f}} /* ALL_REGS */ + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +extern enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER]; + +#define REGNO_REG_CLASS(REGNO) sparc_regno_reg_class[(REGNO)] + +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, EXTRA_FP_REGS, FPCC_REGS, LIM_REG_CLASSES \ +} + +/* Defines invalid mode changes. Borrowed from pa64-regs.h. + + SImode loads to floating-point registers are not zero-extended. + The definition for LOAD_EXTEND_OP specifies that integer loads + narrower than BITS_PER_WORD will be zero-extended. As a result, + we inhibit changes from SImode unless they are to a mode that is + identical in size. */ + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + (TARGET_ARCH64 \ + && (FROM) == SImode \ + && GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ + ? reg_classes_intersect_p (CLASS, FP_REGS) : 0) + +/* This is the order in which to allocate registers normally. + + We put %f0-%f7 last among the float registers, so as to make it more + likely that a pseudo-register which dies in the float return register + area will get allocated to the float return register, thus saving a move + instruction at the end of the function. + + Similarly for integer return value registers. + + We know in this case that we will not end up with a leaf function. + + The register allocator is given the global and out registers first + because these registers are call clobbered and thus less useful to + global register allocation. + + Next we list the local and in registers. They are not call clobbered + and thus very useful for global register allocation. We list the input + registers before the locals so that it is more likely the incoming + arguments received in those registers can just stay there and not be + reloaded. */ + +#define REG_ALLOC_ORDER \ +{ 1, 2, 3, 4, 5, 6, 7, /* %g1-%g7 */ \ + 13, 12, 11, 10, 9, 8, /* %o5-%o0 */ \ + 15, /* %o7 */ \ + 16, 17, 18, 19, 20, 21, 22, 23, /* %l0-%l7 */ \ + 29, 28, 27, 26, 25, 24, 31, /* %i5-%i0,%i7 */\ + 40, 41, 42, 43, 44, 45, 46, 47, /* %f8-%f15 */ \ + 48, 49, 50, 51, 52, 53, 54, 55, /* %f16-%f23 */ \ + 56, 57, 58, 59, 60, 61, 62, 63, /* %f24-%f31 */ \ + 64, 65, 66, 67, 68, 69, 70, 71, /* %f32-%f39 */ \ + 72, 73, 74, 75, 76, 77, 78, 79, /* %f40-%f47 */ \ + 80, 81, 82, 83, 84, 85, 86, 87, /* %f48-%f55 */ \ + 88, 89, 90, 91, 92, 93, 94, 95, /* %f56-%f63 */ \ + 39, 38, 37, 36, 35, 34, 33, 32, /* %f7-%f0 */ \ + 96, 97, 98, 99, /* %fcc0-3 */ \ + 100, 0, 14, 30, 101} /* %icc, %g0, %o6, %i6, %sfp */ + +/* This is the order in which to allocate registers for + leaf functions. If all registers can fit in the global and + output registers, then we have the possibility of having a leaf + function. + + The macro actually mentioned the input registers first, + because they get renumbered into the output registers once + we know really do have a leaf function. + + To be more precise, this register allocation order is used + when %o7 is found to not be clobbered right before register + allocation. Normally, the reason %o7 would be clobbered is + due to a call which could not be transformed into a sibling + call. + + As a consequence, it is possible to use the leaf register + allocation order and not end up with a leaf function. We will + not get suboptimal register allocation in that case because by + definition of being potentially leaf, there were no function + calls. Therefore, allocation order within the local register + window is not critical like it is when we do have function calls. */ + +#define REG_LEAF_ALLOC_ORDER \ +{ 1, 2, 3, 4, 5, 6, 7, /* %g1-%g7 */ \ + 29, 28, 27, 26, 25, 24, /* %i5-%i0 */ \ + 15, /* %o7 */ \ + 13, 12, 11, 10, 9, 8, /* %o5-%o0 */ \ + 16, 17, 18, 19, 20, 21, 22, 23, /* %l0-%l7 */ \ + 40, 41, 42, 43, 44, 45, 46, 47, /* %f8-%f15 */ \ + 48, 49, 50, 51, 52, 53, 54, 55, /* %f16-%f23 */ \ + 56, 57, 58, 59, 60, 61, 62, 63, /* %f24-%f31 */ \ + 64, 65, 66, 67, 68, 69, 70, 71, /* %f32-%f39 */ \ + 72, 73, 74, 75, 76, 77, 78, 79, /* %f40-%f47 */ \ + 80, 81, 82, 83, 84, 85, 86, 87, /* %f48-%f55 */ \ + 88, 89, 90, 91, 92, 93, 94, 95, /* %f56-%f63 */ \ + 39, 38, 37, 36, 35, 34, 33, 32, /* %f7-%f0 */ \ + 96, 97, 98, 99, /* %fcc0-3 */ \ + 100, 0, 14, 30, 31, 101} /* %icc, %g0, %o6, %i6, %i7, %sfp */ + +#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc () + +extern char sparc_leaf_regs[]; +#define LEAF_REGISTERS sparc_leaf_regs + +extern char leaf_reg_remap[]; +#define LEAF_REG_REMAP(REGNO) (leaf_reg_remap[REGNO]) + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS GENERAL_REGS +#define BASE_REG_CLASS GENERAL_REGS + +/* Local macro to handle the two v9 classes of FP regs. */ +#define FP_REG_CLASS_P(CLASS) ((CLASS) == FP_REGS || (CLASS) == EXTRA_FP_REGS) + +/* Predicates for 10-bit, 11-bit and 13-bit signed constants. */ +#define SPARC_SIMM10_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x200 < 0x400) +#define SPARC_SIMM11_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x400 < 0x800) +#define SPARC_SIMM13_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x1000 < 0x2000) + +/* 10- and 11-bit immediates are only used for a few specific insns. + SMALL_INT is used throughout the port so we continue to use it. */ +#define SMALL_INT(X) (SPARC_SIMM13_P (INTVAL (X))) + +/* Predicate for constants that can be loaded with a sethi instruction. + This is the general, 64-bit aware, bitwise version that ensures that + only constants whose representation fits in the mask + + 0x00000000fffffc00 + + are accepted. It will reject, for example, negative SImode constants + on 64-bit hosts, so correct handling is to mask the value beforehand + according to the mode of the instruction. */ +#define SPARC_SETHI_P(X) \ + (((unsigned HOST_WIDE_INT) (X) \ + & ((unsigned HOST_WIDE_INT) 0x3ff - GET_MODE_MASK (SImode) - 1)) == 0) + +/* Version of the above predicate for SImode constants and below. */ +#define SPARC_SETHI32_P(X) \ + (SPARC_SETHI_P ((unsigned HOST_WIDE_INT) (X) & GET_MODE_MASK (SImode))) + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. */ +/* - We can't load constants into FP registers. + - We can't load FP constants into integer registers when soft-float, + because there is no soft-float pattern with a r/F constraint. + - We can't load FP constants into integer registers for TFmode unless + it is 0.0L, because there is no movtf pattern with a r/F constraint. + - Try and reload integer constants (symbolic or otherwise) back into + registers directly, rather than having them dumped to memory. */ + +#define PREFERRED_RELOAD_CLASS(X,CLASS) \ + (CONSTANT_P (X) \ + ? ((FP_REG_CLASS_P (CLASS) \ + || (CLASS) == GENERAL_OR_FP_REGS \ + || (CLASS) == GENERAL_OR_EXTRA_FP_REGS \ + || (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT \ + && ! TARGET_FPU) \ + || (GET_MODE (X) == TFmode \ + && ! const_zero_operand (X, TFmode))) \ + ? NO_REGS \ + : (!FP_REG_CLASS_P (CLASS) \ + && GET_MODE_CLASS (GET_MODE (X)) == MODE_INT) \ + ? GENERAL_REGS \ + : (CLASS)) \ + : (CLASS)) + +/* Return the register class of a scratch register needed to load IN into + a register of class CLASS in MODE. + + We need a temporary when loading/storing a HImode/QImode value + between memory and the FPU registers. This can happen when combine puts + a paradoxical subreg in a float/fix conversion insn. + + We need a temporary when loading/storing a DFmode value between + unaligned memory and the upper FPU registers. */ + +#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, IN) \ + ((FP_REG_CLASS_P (CLASS) \ + && ((MODE) == HImode || (MODE) == QImode) \ + && (GET_CODE (IN) == MEM \ + || ((GET_CODE (IN) == REG || GET_CODE (IN) == SUBREG) \ + && true_regnum (IN) == -1))) \ + ? GENERAL_REGS \ + : ((CLASS) == EXTRA_FP_REGS && (MODE) == DFmode \ + && GET_CODE (IN) == MEM && TARGET_ARCH32 \ + && ! mem_min_alignment ((IN), 8)) \ + ? FP_REGS \ + : (((TARGET_CM_MEDANY \ + && symbolic_operand ((IN), (MODE))) \ + || (TARGET_CM_EMBMEDANY \ + && text_segment_operand ((IN), (MODE)))) \ + && !flag_pic) \ + ? GENERAL_REGS \ + : NO_REGS) + +#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, IN) \ + ((FP_REG_CLASS_P (CLASS) \ + && ((MODE) == HImode || (MODE) == QImode) \ + && (GET_CODE (IN) == MEM \ + || ((GET_CODE (IN) == REG || GET_CODE (IN) == SUBREG) \ + && true_regnum (IN) == -1))) \ + ? GENERAL_REGS \ + : ((CLASS) == EXTRA_FP_REGS && (MODE) == DFmode \ + && GET_CODE (IN) == MEM && TARGET_ARCH32 \ + && ! mem_min_alignment ((IN), 8)) \ + ? FP_REGS \ + : (((TARGET_CM_MEDANY \ + && symbolic_operand ((IN), (MODE))) \ + || (TARGET_CM_EMBMEDANY \ + && text_segment_operand ((IN), (MODE)))) \ + && !flag_pic) \ + ? GENERAL_REGS \ + : NO_REGS) + +/* On SPARC it is not possible to directly move data between + GENERAL_REGS and FP_REGS. */ +#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \ + (FP_REG_CLASS_P (CLASS1) != FP_REG_CLASS_P (CLASS2)) + +/* Get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9 + because the movsi and movsf patterns don't handle r/f moves. + For v8 we copy the default definition. */ +#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \ + (TARGET_ARCH64 \ + ? (GET_MODE_BITSIZE (MODE) < 32 \ + ? mode_for_size (32, GET_MODE_CLASS (MODE), 0) \ + : MODE) \ + : (GET_MODE_BITSIZE (MODE) < BITS_PER_WORD \ + ? mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (MODE), 0) \ + : MODE)) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ +/* On SPARC, this is the size of MODE in words. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + (FP_REG_CLASS_P (CLASS) ? (GET_MODE_SIZE (MODE) + 3) / 4 \ + : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Stack layout; function entry, exit and calling. */ + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* Offset of first parameter from the argument pointer register value. + !v9: This is 64 for the ins and locals, plus 4 for the struct-return reg + even if this function isn't going to use it. + v9: This is 128 for the ins and locals. */ +#define FIRST_PARM_OFFSET(FNDECL) \ + (TARGET_ARCH64 ? 16 * UNITS_PER_WORD : STRUCT_VALUE_OFFSET + UNITS_PER_WORD) + +/* Offset from the argument pointer register value to the CFA. + This is different from FIRST_PARM_OFFSET because the register window + comes between the CFA and the arguments. */ +#define ARG_POINTER_CFA_OFFSET(FNDECL) 0 + +/* When a parameter is passed in a register, stack space is still + allocated for it. + !v9: All 6 possible integer registers have backing store allocated. + v9: Only space for the arguments passed is allocated. */ +/* ??? Ideally, we'd use zero here (as the minimum), but zero has special + meaning to the backend. Further, we need to be able to detect if a + varargs/unprototyped function is called, as they may want to spill more + registers than we've provided space. Ugly, ugly. So for now we retain + all 6 slots even for v9. */ +#define REG_PARM_STACK_SPACE(DECL) (6 * UNITS_PER_WORD) + +/* Definitions for register elimination. */ + +#define ELIMINABLE_REGS \ + {{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM} } + +/* We always pretend that this is a leaf function because if it's not, + there's no point in trying to eliminate the frame pointer. If it + is a leaf function, we guessed right! */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + do { \ + if ((TO) == STACK_POINTER_REGNUM) \ + (OFFSET) = sparc_compute_frame_size (get_frame_size (), 1); \ + else \ + (OFFSET) = 0; \ + (OFFSET) += SPARC_STACK_BIAS; \ + } while (0) + +/* Keep the stack pointer constant throughout the function. + This is both an optimization and a necessity: longjmp + doesn't behave itself when the stack pointer moves within + the function! */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Define this macro if the target machine has "register windows". This + C expression returns the register number as seen by the called function + corresponding to register number OUT as seen by the calling function. + Return OUT if register number OUT is not an outbound register. */ + +#define INCOMING_REGNO(OUT) \ + (((OUT) < 8 || (OUT) > 15) ? (OUT) : (OUT) + 16) + +/* Define this macro if the target machine has "register windows". This + C expression returns the register number as seen by the calling function + corresponding to register number IN as seen by the called function. + Return IN if register number IN is not an inbound register. */ + +#define OUTGOING_REGNO(IN) \ + (((IN) < 24 || (IN) > 31) ? (IN) : (IN) - 16) + +/* Define this macro if the target machine has register windows. This + C expression returns true if the register is call-saved but is in the + register window. */ + +#define LOCAL_REGNO(REGNO) \ + ((REGNO) >= 16 && (REGNO) <= 31) + +/* Define the size of space to allocate for the return value of an + untyped_call. */ + +#define APPLY_RESULT_SIZE (TARGET_ARCH64 ? 24 : 16) + +/* 1 if N is a possible register number for function argument passing. + On SPARC, these are the "output" registers. v9 also uses %f0-%f31. */ + +#define FUNCTION_ARG_REGNO_P(N) \ +(TARGET_ARCH64 \ + ? (((N) >= 8 && (N) <= 13) || ((N) >= 32 && (N) <= 63)) \ + : ((N) >= 8 && (N) <= 13)) + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. + + On SPARC (!v9), this is a single integer, which is a number of words + of arguments scanned so far (including the invisible argument, + if any, which holds the structure-value-address). + Thus 7 or more means all following args should go on the stack. + + For v9, we also need to know whether a prototype is present. */ + +struct sparc_args { + int words; /* number of words passed so far */ + int prototype_p; /* nonzero if a prototype is present */ + int libcall_p; /* nonzero if a library call */ +}; +#define CUMULATIVE_ARGS struct sparc_args + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ +init_cumulative_args (& (CUM), (FNTYPE), (LIBNAME), (FNDECL)); + +/* If defined, a C expression which determines whether, and in which direction, + to pad out an argument with extra space. The value should be of type + `enum direction': either `upward' to pad above the argument, + `downward' to pad below, or `none' to inhibit padding. */ + +#define FUNCTION_ARG_PADDING(MODE, TYPE) \ +function_arg_padding ((MODE), (TYPE)) + + +/* Generate the special assembly code needed to tell the assembler whatever + it might need to know about the return value of a function. + + For SPARC assemblers, we need to output a .proc pseudo-op which conveys + information to the assembler relating to peephole optimization (done in + the assembler). */ + +#define ASM_DECLARE_RESULT(FILE, RESULT) \ + fprintf ((FILE), "\t.proc\t0%lo\n", sparc_type_code (TREE_TYPE (RESULT))) + +/* Output the special assembly code needed to tell the assembler some + register is used as global register variable. + + SPARC 64bit psABI declares registers %g2 and %g3 as application + registers and %g6 and %g7 as OS registers. Any object using them + should declare (for %g2/%g3 has to, for %g6/%g7 can) that it uses them + and how they are used (scratch or some global variable). + Linker will then refuse to link together objects which use those + registers incompatibly. + + Unless the registers are used for scratch, two different global + registers cannot be declared to the same name, so in the unlikely + case of a global register variable occupying more than one register + we prefix the second and following registers with .gnu.part1. etc. */ + +extern GTY(()) char sparc_hard_reg_printed[8]; + +#ifdef HAVE_AS_REGISTER_PSEUDO_OP +#define ASM_DECLARE_REGISTER_GLOBAL(FILE, DECL, REGNO, NAME) \ +do { \ + if (TARGET_ARCH64) \ + { \ + int end = HARD_REGNO_NREGS ((REGNO), DECL_MODE (decl)) + (REGNO); \ + int reg; \ + for (reg = (REGNO); reg < 8 && reg < end; reg++) \ + if ((reg & ~1) == 2 || (reg & ~1) == 6) \ + { \ + if (reg == (REGNO)) \ + fprintf ((FILE), "\t.register\t%%g%d, %s\n", reg, (NAME)); \ + else \ + fprintf ((FILE), "\t.register\t%%g%d, .gnu.part%d.%s\n", \ + reg, reg - (REGNO), (NAME)); \ + sparc_hard_reg_printed[reg] = 1; \ + } \ + } \ +} while (0) +#endif + + +/* Emit rtl for profiling. */ +#define PROFILE_HOOK(LABEL) sparc_profile_hook (LABEL) + +/* All the work done in PROFILE_HOOK, but still required. */ +#define FUNCTION_PROFILER(FILE, LABELNO) do { } while (0) + +/* Set the name of the mcount function for the system. */ +#define MCOUNT_FUNCTION "*mcount" + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ + +#define EXIT_IGNORE_STACK \ + (get_frame_size () != 0 \ + || cfun->calls_alloca || crtl->outgoing_args_size) + +/* Define registers used by the epilogue and return instruction. */ +#define EPILOGUE_USES(REGNO) ((REGNO) == 31 \ + || (crtl->calls_eh_return && (REGNO) == 1)) + +/* Length in units of the trampoline for entering a nested function. */ + +#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 32 : 16) + +#define TRAMPOLINE_ALIGNMENT 128 /* 16 bytes */ + +/* Generate RTL to flush the register windows so as to make arbitrary frames + available. */ +#define SETUP_FRAME_ADDRESSES() \ + emit_insn (gen_flush_register_windows ()) + +/* Given an rtx for the address of a frame, + return an rtx for the address of the word in the frame + that holds the dynamic chain--the previous frame's address. */ +#define DYNAMIC_CHAIN_ADDRESS(frame) \ + plus_constant (frame, 14 * UNITS_PER_WORD + SPARC_STACK_BIAS) + +/* Given an rtx for the frame pointer, + return an rtx for the address of the frame. */ +#define FRAME_ADDR_RTX(frame) plus_constant (frame, SPARC_STACK_BIAS) + +/* The return address isn't on the stack, it is in a register, so we can't + access it from the current frame pointer. We can access it from the + previous frame pointer though by reading a value from the register window + save area. */ +#define RETURN_ADDR_IN_PREVIOUS_FRAME + +/* This is the offset of the return address to the true next instruction to be + executed for the current function. */ +#define RETURN_ADDR_OFFSET \ + (8 + 4 * (! TARGET_ARCH64 && cfun->returns_struct)) + +/* The current return address is in %i7. The return address of anything + farther back is in the register window save area at [%fp+60]. */ +/* ??? This ignores the fact that the actual return address is +8 for normal + returns, and +12 for structure returns. */ +#define RETURN_ADDR_RTX(count, frame) \ + ((count == -1) \ + ? gen_rtx_REG (Pmode, 31) \ + : gen_rtx_MEM (Pmode, \ + memory_address (Pmode, plus_constant (frame, \ + 15 * UNITS_PER_WORD \ + + SPARC_STACK_BIAS)))) + +/* Before the prologue, the return address is %o7 + 8. OK, sometimes it's + +12, but always using +8 is close enough for frame unwind purposes. + Actually, just using %o7 is close enough for unwinding, but %o7+8 + is something you can return to. */ +#define INCOMING_RETURN_ADDR_RTX \ + plus_constant (gen_rtx_REG (word_mode, 15), 8) +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (15) + +/* The offset from the incoming value of %sp to the top of the stack frame + for the current function. On sparc64, we have to account for the stack + bias if present. */ +#define INCOMING_FRAME_SP_OFFSET SPARC_STACK_BIAS + +/* Describe how we implement __builtin_eh_return. */ +#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 24 : INVALID_REGNUM) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 1) /* %g1 */ +#define EH_RETURN_HANDLER_RTX gen_rtx_REG (Pmode, 31) /* %i7 */ + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + If assembler and linker properly support .uaword %r_disp32(foo), + then use PC relative 32-bit relocations instead of absolute relocs + for shared libraries. On sparc64, use pc relative 32-bit relocs even + for binaries, to save memory. + + binutils 2.12 would emit a R_SPARC_DISP32 dynamic relocation if the + symbol %r_disp32() is against was not local, but .hidden. In that + case, we have to use DW_EH_PE_absptr for pic personality. */ +#ifdef HAVE_AS_SPARC_UA_PCREL +#ifdef HAVE_AS_SPARC_UA_PCREL_HIDDEN +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (flag_pic \ + ? (GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4\ + : ((TARGET_ARCH64 && ! GLOBAL) \ + ? (DW_EH_PE_pcrel | DW_EH_PE_sdata4) \ + : DW_EH_PE_absptr)) +#else +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (flag_pic \ + ? (GLOBAL ? DW_EH_PE_absptr : (DW_EH_PE_pcrel | DW_EH_PE_sdata4)) \ + : ((TARGET_ARCH64 && ! GLOBAL) \ + ? (DW_EH_PE_pcrel | DW_EH_PE_sdata4) \ + : DW_EH_PE_absptr)) +#endif + +/* Emit a PC-relative relocation. */ +#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL) \ + do { \ + fputs (integer_asm_op (SIZE, FALSE), FILE); \ + fprintf (FILE, "%%r_disp%d(", SIZE * 8); \ + assemble_name (FILE, LABEL); \ + fputc (')', FILE); \ + } while (0) +#endif + +/* Addressing modes, and classification of registers for them. */ + +/* Macros to check register numbers against specific register classes. */ + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ + +#define REGNO_OK_FOR_INDEX_P(REGNO) \ +((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < (unsigned)32 \ + || (REGNO) == FRAME_POINTER_REGNUM \ + || reg_renumber[REGNO] == FRAME_POINTER_REGNUM) + +#define REGNO_OK_FOR_BASE_P(REGNO) REGNO_OK_FOR_INDEX_P (REGNO) + +#define REGNO_OK_FOR_FP_P(REGNO) \ + (((unsigned) (REGNO) - 32 < (TARGET_V9 ? (unsigned)64 : (unsigned)32)) \ + || ((unsigned) reg_renumber[REGNO] - 32 < (TARGET_V9 ? (unsigned)64 : (unsigned)32))) +#define REGNO_OK_FOR_CCFP_P(REGNO) \ + (TARGET_V9 \ + && (((unsigned) (REGNO) - 96 < (unsigned)4) \ + || ((unsigned) reg_renumber[REGNO] - 96 < (unsigned)4))) + +/* Now macros that check whether X is a register and also, + strictly, whether it is in a specified class. + + These macros are specific to the SPARC, and may be used only + in code for printing assembler insns and in conditions for + define_optimization. */ + +/* 1 if X is an fp register. */ + +#define FP_REG_P(X) (REG_P (X) && REGNO_OK_FOR_FP_P (REGNO (X))) + +/* Is X, a REG, an in or global register? i.e. is regno 0..7 or 24..31 */ +#define IN_OR_GLOBAL_P(X) (REGNO (X) < 8 || (REGNO (X) >= 24 && REGNO (X) <= 31)) + +/* Maximum number of registers that can appear in a valid memory address. */ + +#define MAX_REGS_PER_ADDRESS 2 + +/* Recognize any constant value that is a valid address. + When PIC, we do not accept an address that would require a scratch reg + to load into a register. */ + +#define CONSTANT_ADDRESS_P(X) constant_address_p (X) + +/* Define this, so that when PIC, reload won't try to reload invalid + addresses which require two reload registers. */ + +#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X) + +/* Nonzero if the constant value X is a legitimate general operand. + Anything can be made to work except floating point constants. + If TARGET_VIS, 0.0 can be made to work as well. */ + +#define LEGITIMATE_CONSTANT_P(X) legitimate_constant_p (X) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The usual definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + The symbol REG_OK_STRICT causes the latter definition to be used. + + Most source files want to accept pseudo regs in the hope that + they will get allocated to the class that the insn wants them to be in. + Source files for reload pass need to be strict. + After reload, it makes no difference, since pseudo regs have + been eliminated by then. */ + +#ifndef REG_OK_STRICT + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. */ +#define REG_OK_FOR_INDEX_P(X) \ + (REGNO (X) < 32 \ + || REGNO (X) == FRAME_POINTER_REGNUM \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER) + +/* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg. */ +#define REG_OK_FOR_BASE_P(X) REG_OK_FOR_INDEX_P (X) + +#else + +/* Nonzero if X is a hard reg that can be used as an index. */ +#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X)) +/* Nonzero if X is a hard reg that can be used as a base reg. */ +#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) + +#endif + +/* Should gcc use [%reg+%lo(xx)+offset] addresses? */ + +#ifdef HAVE_AS_OFFSETABLE_LO10 +#define USE_AS_OFFSETABLE_LO10 1 +#else +#define USE_AS_OFFSETABLE_LO10 0 +#endif + +/* On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT + ordinarily. This changes a bit when generating PIC. The details are + in sparc.c's implementation of TARGET_LEGITIMATE_ADDRESS_P. */ + +#define SYMBOLIC_CONST(X) symbolic_operand (X, VOIDmode) + +#define RTX_OK_FOR_BASE_P(X) \ + ((GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X)) \ + || (GET_CODE (X) == SUBREG \ + && GET_CODE (SUBREG_REG (X)) == REG \ + && REG_OK_FOR_BASE_P (SUBREG_REG (X)))) + +#define RTX_OK_FOR_INDEX_P(X) \ + ((GET_CODE (X) == REG && REG_OK_FOR_INDEX_P (X)) \ + || (GET_CODE (X) == SUBREG \ + && GET_CODE (SUBREG_REG (X)) == REG \ + && REG_OK_FOR_INDEX_P (SUBREG_REG (X)))) + +#define RTX_OK_FOR_OFFSET_P(X) \ + (GET_CODE (X) == CONST_INT && INTVAL (X) >= -0x1000 && INTVAL (X) < 0x1000 - 8) + +#define RTX_OK_FOR_OLO10_P(X) \ + (GET_CODE (X) == CONST_INT && INTVAL (X) >= -0x1000 && INTVAL (X) < 0xc00 - 8) + + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and jump to WIN. This + macro is used in only one place: `find_reloads_address' in reload.c. */ +#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN) \ +do { \ + int win; \ + (X) = sparc_legitimize_reload_address ((X), (MODE), (OPNUM), \ + (int)(TYPE), (IND_LEVELS), &win); \ + if (win) \ + goto WIN; \ +} while (0) + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +/* If we ever implement any of the full models (such as CM_FULLANY), + this has to be DImode in that case */ +#ifdef HAVE_GAS_SUBSECTION_ORDERING +#define CASE_VECTOR_MODE \ +(! TARGET_PTR64 ? SImode : flag_pic ? SImode : TARGET_CM_MEDLOW ? SImode : DImode) +#else +/* If assembler does not have working .subsection -1, we use DImode for pic, as otherwise + we have to sign extend which slows things down. */ +#define CASE_VECTOR_MODE \ +(! TARGET_PTR64 ? SImode : flag_pic ? DImode : TARGET_CM_MEDLOW ? SImode : DImode) +#endif + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 8 + +/* If a memory-to-memory move would take MOVE_RATIO or more simple + move-instruction pairs, we will do a movmem or libcall instead. */ + +#define MOVE_RATIO(speed) ((speed) ? 8 : 3) + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + +/* Nonzero if access to memory by bytes is slow and undesirable. + For RISC chips, it means that access to memory by bytes is no + better than access by words when possible, so grab a whole word + and maybe make use of that. */ +#define SLOW_BYTE_ACCESS 1 + +/* Define this to be nonzero if shift instructions ignore all but the low-order + few bits. */ +#define SHIFT_COUNT_TRUNCATED 1 + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. For floating-point, + CCFP[E]mode is used. CC_NOOVmode should be used when the first operand + is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special + processing is needed. */ +#define SELECT_CC_MODE(OP,X,Y) select_cc_mode ((OP), (X), (Y)) + +/* Return nonzero if MODE implies a floating point inequality can be + reversed. For SPARC this is always true because we have a full + compliment of ordered and unordered comparisons, but until generic + code knows how to reverse it correctly we keep the old definition. */ +#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CCFPEmode && (MODE) != CCFPmode) + +/* A function address in a call instruction for indexing purposes. */ +#define FUNCTION_MODE Pmode + +/* Define this if addresses of constant functions + shouldn't be put through pseudo regs where they can be cse'd. + Desirable on machines where ordinary constants are expensive + but a CALL with constant address is cheap. */ +#define NO_FUNCTION_CSE + +/* alloca should avoid clobbering the old register save area. */ +#define SETJMP_VIA_SAVE_AREA + +/* The _Q_* comparison libcalls return booleans. */ +#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) ((MODE) == TFmode) + +/* Assume by default that the _Qp_* 64-bit libcalls are implemented such + that the inputs are fully consumed before the output memory is clobbered. */ + +#define TARGET_BUGGY_QP_LIB 0 + +/* Assume by default that we do not have the Solaris-specific conversion + routines nor 64-bit integer multiply and divide routines. */ + +#define SUN_CONVERSION_LIBFUNCS 0 +#define DITF_CONVERSION_LIBFUNCS 0 +#define SUN_INTEGER_MULTIPLY_64 0 + +/* Compute extra cost of moving data between one register class + and another. */ +#define GENERAL_OR_I64(C) ((C) == GENERAL_REGS || (C) == I64_REGS) +#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) \ + (((FP_REG_CLASS_P (CLASS1) && GENERAL_OR_I64 (CLASS2)) \ + || (GENERAL_OR_I64 (CLASS1) && FP_REG_CLASS_P (CLASS2)) \ + || (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS) \ + ? ((sparc_cpu == PROCESSOR_ULTRASPARC \ + || sparc_cpu == PROCESSOR_ULTRASPARC3 \ + || sparc_cpu == PROCESSOR_NIAGARA \ + || sparc_cpu == PROCESSOR_NIAGARA2) ? 12 : 6) : 2) + +/* Provide the cost of a branch. For pre-v9 processors we use + a value of 3 to take into account the potential annulling of + the delay slot (which ends up being a bubble in the pipeline slot) + plus a cycle to take into consideration the instruction cache + effects. + + On v9 and later, which have branch prediction facilities, we set + it to the depth of the pipeline as that is the cost of a + mispredicted branch. + + On Niagara, normal branches insert 3 bubbles into the pipe + and annulled branches insert 4 bubbles. + + On Niagara-2, a not-taken branch costs 1 cycle whereas a taken + branch costs 6 cycles. */ + +#define BRANCH_COST(speed_p, predictable_p) \ + ((sparc_cpu == PROCESSOR_V9 \ + || sparc_cpu == PROCESSOR_ULTRASPARC) \ + ? 7 \ + : (sparc_cpu == PROCESSOR_ULTRASPARC3 \ + ? 9 \ + : (sparc_cpu == PROCESSOR_NIAGARA \ + ? 4 \ + : (sparc_cpu == PROCESSOR_NIAGARA2 \ + ? 5 \ + : 3)))) + +/* Control the assembler format that we output. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will end at + the end of the line. */ + +#define ASM_COMMENT_START "!" + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ + +#define ASM_APP_ON "" + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ + +#define ASM_APP_OFF "" + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ + +#define REGISTER_NAMES \ +{"%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7", \ + "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7", \ + "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7", \ + "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7", \ + "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", \ + "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15", \ + "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23", \ + "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31", \ + "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39", \ + "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47", \ + "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55", \ + "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63", \ + "%fcc0", "%fcc1", "%fcc2", "%fcc3", "%icc", "%sfp" } + +/* Define additional names for use in asm clobbers and asm declarations. */ + +#define ADDITIONAL_REGISTER_NAMES \ +{{"ccr", SPARC_ICC_REG}, {"cc", SPARC_ICC_REG}} + +/* On Sun 4, this limit is 2048. We use 1000 to be safe, since the length + can run past this up to a continuation point. Once we used 1500, but + a single entry in C++ can run more than 500 bytes, due to the length of + mangled symbol names. dbxout.c should really be fixed to do + continuations when they are actually needed instead of trying to + guess... */ +#define DBX_CONTIN_LENGTH 1000 + +/* This is how to output a command to make the user-level label named NAME + defined for reference from other files. */ + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global " + +/* The prefix to add to user-visible assembler symbols. */ + +#define USER_LABEL_PREFIX "_" + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf ((LABEL), "*%s%ld", (PREFIX), (long)(NUM)) + +/* This is how we hook in and defer the case-vector until the end of + the function. */ +#define ASM_OUTPUT_ADDR_VEC(LAB,VEC) \ + sparc_defer_case_vector ((LAB),(VEC), 0) + +#define ASM_OUTPUT_ADDR_DIFF_VEC(LAB,VEC) \ + sparc_defer_case_vector ((LAB),(VEC), 1) + +/* This is how to output an element of a case-vector that is absolute. */ + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ +do { \ + char label[30]; \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE); \ + if (CASE_VECTOR_MODE == SImode) \ + fprintf (FILE, "\t.word\t"); \ + else \ + fprintf (FILE, "\t.xword\t"); \ + assemble_name (FILE, label); \ + fputc ('\n', FILE); \ +} while (0) + +/* This is how to output an element of a case-vector that is relative. + (SPARC uses such vectors only when generating PIC.) */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ +do { \ + char label[30]; \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE)); \ + if (CASE_VECTOR_MODE == SImode) \ + fprintf (FILE, "\t.word\t"); \ + else \ + fprintf (FILE, "\t.xword\t"); \ + assemble_name (FILE, label); \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", (REL)); \ + fputc ('-', FILE); \ + assemble_name (FILE, label); \ + fputc ('\n', FILE); \ +} while (0) + +/* This is what to output before and after case-vector (both + relative and absolute). If .subsection -1 works, we put case-vectors + at the beginning of the current section. */ + +#ifdef HAVE_GAS_SUBSECTION_ORDERING + +#define ASM_OUTPUT_ADDR_VEC_START(FILE) \ + fprintf(FILE, "\t.subsection\t-1\n") + +#define ASM_OUTPUT_ADDR_VEC_END(FILE) \ + fprintf(FILE, "\t.previous\n") + +#endif + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + if ((LOG) != 0) \ + fprintf (FILE, "\t.align %d\n", (1<<(LOG))) + +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + fprintf (FILE, "\t.skip "HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)) + +/* This says how to output an assembler line + to define a global common symbol. */ + +#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \ +( fputs ("\t.common ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",\"bss\"\n", (SIZE))) + +/* This says how to output an assembler line to define a local common + symbol. */ + +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGNED) \ +( fputs ("\t.reserve ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",\"bss\",%u\n", \ + (SIZE), ((ALIGNED) / BITS_PER_UNIT))) + +/* A C statement (sans semicolon) to output to the stdio stream + FILE the assembler definition of uninitialized global DECL named + NAME whose size is SIZE bytes and alignment is ALIGN bytes. + Try to use asm_output_aligned_bss to implement this macro. */ + +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + do { \ + ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN); \ + } while (0) + +#define IDENT_ASM_OP "\t.ident\t" + +/* Output #ident as a .ident. */ + +#define ASM_OUTPUT_IDENT(FILE, NAME) \ + fprintf (FILE, "%s\"%s\"\n", IDENT_ASM_OP, NAME); + +/* Prettify the assembly. */ + +extern int sparc_indent_opcode; + +#define ASM_OUTPUT_OPCODE(FILE, PTR) \ + do { \ + if (sparc_indent_opcode) \ + { \ + putc (' ', FILE); \ + sparc_indent_opcode = 0; \ + } \ + } while (0) + +#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \ + ((CHAR) == '#' || (CHAR) == '*' || (CHAR) == '(' \ + || (CHAR) == ')' || (CHAR) == '_' || (CHAR) == '&') + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE) + +/* Print a memory address as an operand to reference that memory location. */ + +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ +{ register rtx base, index = 0; \ + int offset = 0; \ + register rtx addr = ADDR; \ + if (GET_CODE (addr) == REG) \ + fputs (reg_names[REGNO (addr)], FILE); \ + else if (GET_CODE (addr) == PLUS) \ + { \ + if (GET_CODE (XEXP (addr, 0)) == CONST_INT) \ + offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);\ + else if (GET_CODE (XEXP (addr, 1)) == CONST_INT) \ + offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);\ + else \ + base = XEXP (addr, 0), index = XEXP (addr, 1); \ + if (GET_CODE (base) == LO_SUM) \ + { \ + gcc_assert (USE_AS_OFFSETABLE_LO10 \ + && TARGET_ARCH64 \ + && ! TARGET_CM_MEDMID); \ + output_operand (XEXP (base, 0), 0); \ + fputs ("+%lo(", FILE); \ + output_address (XEXP (base, 1)); \ + fprintf (FILE, ")+%d", offset); \ + } \ + else \ + { \ + fputs (reg_names[REGNO (base)], FILE); \ + if (index == 0) \ + fprintf (FILE, "%+d", offset); \ + else if (GET_CODE (index) == REG) \ + fprintf (FILE, "+%s", reg_names[REGNO (index)]); \ + else if (GET_CODE (index) == SYMBOL_REF \ + || GET_CODE (index) == LABEL_REF \ + || GET_CODE (index) == CONST) \ + fputc ('+', FILE), output_addr_const (FILE, index); \ + else gcc_unreachable (); \ + } \ + } \ + else if (GET_CODE (addr) == MINUS \ + && GET_CODE (XEXP (addr, 1)) == LABEL_REF) \ + { \ + output_addr_const (FILE, XEXP (addr, 0)); \ + fputs ("-(", FILE); \ + output_addr_const (FILE, XEXP (addr, 1)); \ + fputs ("-.)", FILE); \ + } \ + else if (GET_CODE (addr) == LO_SUM) \ + { \ + output_operand (XEXP (addr, 0), 0); \ + if (TARGET_CM_MEDMID) \ + fputs ("+%l44(", FILE); \ + else \ + fputs ("+%lo(", FILE); \ + output_address (XEXP (addr, 1)); \ + fputc (')', FILE); \ + } \ + else if (flag_pic && GET_CODE (addr) == CONST \ + && GET_CODE (XEXP (addr, 0)) == MINUS \ + && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST \ + && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS \ + && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx) \ + { \ + addr = XEXP (addr, 0); \ + output_addr_const (FILE, XEXP (addr, 0)); \ + /* Group the args of the second CONST in parenthesis. */ \ + fputs ("-(", FILE); \ + /* Skip past the second CONST--it does nothing for us. */\ + output_addr_const (FILE, XEXP (XEXP (addr, 1), 0)); \ + /* Close the parenthesis. */ \ + fputc (')', FILE); \ + } \ + else \ + { \ + output_addr_const (FILE, addr); \ + } \ +} + +/* TLS support defaulting to original Sun flavor. GNU extensions + must be activated in separate configuration files. */ +#ifdef HAVE_AS_TLS +#define TARGET_TLS 1 +#else +#define TARGET_TLS 0 +#endif + +#define TARGET_SUN_TLS TARGET_TLS +#define TARGET_GNU_TLS 0 + +/* The number of Pmode words for the setjmp buffer. */ +#define JMP_BUF_SIZE 12 + +/* We use gcc _mcount for profiling. */ +#define NO_PROFILE_COUNTERS 0 diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md new file mode 100644 index 000000000..06b34e908 --- /dev/null +++ b/gcc/config/sparc/sparc.md @@ -0,0 +1,7828 @@ +;; Machine description for SPARC chip for GCC +;; Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, +;; 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 +;; Free Software Foundation, Inc. +;; Contributed by Michael Tiemann (tiemann@cygnus.com) +;; 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, +;; at Cygnus Support. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +(define_constants + [(UNSPEC_MOVE_PIC 0) + (UNSPEC_UPDATE_RETURN 1) + (UNSPEC_LOAD_PCREL_SYM 2) + (UNSPEC_FRAME_BLOCKAGE 3) + (UNSPEC_MOVE_PIC_LABEL 5) + (UNSPEC_SETH44 6) + (UNSPEC_SETM44 7) + (UNSPEC_SETHH 9) + (UNSPEC_SETLM 10) + (UNSPEC_EMB_HISUM 11) + (UNSPEC_EMB_TEXTUHI 13) + (UNSPEC_EMB_TEXTHI 14) + (UNSPEC_EMB_TEXTULO 15) + (UNSPEC_EMB_SETHM 18) + (UNSPEC_MOVE_GOTDATA 19) + + (UNSPEC_MEMBAR 20) + + (UNSPEC_TLSGD 30) + (UNSPEC_TLSLDM 31) + (UNSPEC_TLSLDO 32) + (UNSPEC_TLSIE 33) + (UNSPEC_TLSLE 34) + (UNSPEC_TLSLD_BASE 35) + + (UNSPEC_FPACK16 40) + (UNSPEC_FPACK32 41) + (UNSPEC_FPACKFIX 42) + (UNSPEC_FEXPAND 43) + (UNSPEC_FPMERGE 44) + (UNSPEC_MUL16AL 45) + (UNSPEC_MUL8UL 46) + (UNSPEC_MULDUL 47) + (UNSPEC_ALIGNDATA 48) + (UNSPEC_ALIGNADDR 49) + (UNSPEC_PDIST 50) + + (UNSPEC_SP_SET 60) + (UNSPEC_SP_TEST 61) + ]) + +(define_constants + [(UNSPECV_BLOCKAGE 0) + (UNSPECV_FLUSHW 1) + (UNSPECV_GOTO 2) + (UNSPECV_FLUSH 4) + (UNSPECV_SETJMP 5) + (UNSPECV_SAVEW 6) + (UNSPECV_CAS 8) + (UNSPECV_SWAP 9) + (UNSPECV_LDSTUB 10) + (UNSPECV_PROBE_STACK_RANGE 11) + ]) + + +(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) +(define_mode_iterator I [QI HI SI DI]) +(define_mode_iterator F [SF DF TF]) + +;; We don't define V1SI because SI should work just fine. +(define_mode_iterator V32 [SF V2HI V4QI]) +(define_mode_iterator V32I [SI V2HI V4QI]) + +(define_mode_iterator V64 [DF V2SI V4HI V8QI]) +(define_mode_iterator V64I [DI V2SI V4HI V8QI]) + +;; The upper 32 fp regs on the v9 can't hold SFmode values. To deal with this +;; a second register class, EXTRA_FP_REGS, exists for the v9 chip. The name +;; is a bit of a misnomer as it covers all 64 fp regs. The corresponding +;; constraint letter is 'e'. To avoid any confusion, 'e' is used instead of +;; 'f' for all DF/TFmode values, including those that are specific to the v8. + + +;; Attribute for cpu type. +;; These must match the values for enum processor_type in sparc.h. +(define_attr "cpu" + "v7, + cypress, + v8, + supersparc, + hypersparc, + leon, + sparclite, + f930, + f934, + sparclite86x, + sparclet, + tsc701, + v9, + ultrasparc, + ultrasparc3, + niagara, + niagara2" + (const (symbol_ref "sparc_cpu_attr"))) + +;; Attribute for the instruction set. +;; At present we only need to distinguish v9/!v9, but for clarity we +;; test TARGET_V8 too. +(define_attr "isa" "v7,v8,v9,sparclet" + (const + (cond [(symbol_ref "TARGET_V9") (const_string "v9") + (symbol_ref "TARGET_V8") (const_string "v8") + (symbol_ref "TARGET_SPARCLET") (const_string "sparclet")] + (const_string "v7")))) + +;; Insn type. +(define_attr "type" + "ialu,compare,shift, + load,sload,store, + uncond_branch,branch,call,sibcall,call_no_delay_slot,return, + imul,idiv, + fpload,fpstore, + fp,fpmove, + fpcmove,fpcrmove, + fpcmp, + fpmul,fpdivs,fpdivd, + fpsqrts,fpsqrtd, + fga,fgm_pack,fgm_mul,fgm_pdist,fgm_cmp, + cmove, + ialuX, + multi,savew,flushw,iflush,trap" + (const_string "ialu")) + +;; True if branch/call has empty delay slot and will emit a nop in it +(define_attr "empty_delay_slot" "false,true" + (symbol_ref "(empty_delay_slot (insn) + ? EMPTY_DELAY_SLOT_TRUE : EMPTY_DELAY_SLOT_FALSE)")) + +(define_attr "branch_type" "none,icc,fcc,reg" + (const_string "none")) + +(define_attr "pic" "false,true" + (symbol_ref "(flag_pic != 0 ? PIC_TRUE : PIC_FALSE)")) + +(define_attr "calls_alloca" "false,true" + (symbol_ref "(cfun->calls_alloca != 0 + ? CALLS_ALLOCA_TRUE : CALLS_ALLOCA_FALSE)")) + +(define_attr "calls_eh_return" "false,true" + (symbol_ref "(crtl->calls_eh_return != 0 + ? CALLS_EH_RETURN_TRUE : CALLS_EH_RETURN_FALSE)")) + +(define_attr "leaf_function" "false,true" + (symbol_ref "(current_function_uses_only_leaf_regs != 0 + ? LEAF_FUNCTION_TRUE : LEAF_FUNCTION_FALSE)")) + +(define_attr "delayed_branch" "false,true" + (symbol_ref "(flag_delayed_branch != 0 + ? DELAYED_BRANCH_TRUE : DELAYED_BRANCH_FALSE)")) + +;; Length (in # of insns). +;; Beware that setting a length greater or equal to 3 for conditional branches +;; has a side-effect (see output_cbranch and output_v9branch). +(define_attr "length" "" + (cond [(eq_attr "type" "uncond_branch,call") + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 2) + (const_int 1)) + (eq_attr "type" "sibcall") + (if_then_else (eq_attr "leaf_function" "true") + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 3) + (const_int 2)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 2) + (const_int 1))) + (eq_attr "branch_type" "icc") + (if_then_else (match_operand 0 "noov_compare64_operator" "") + (if_then_else (lt (pc) (match_dup 1)) + (if_then_else (lt (minus (match_dup 1) (pc)) (const_int 260000)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 2) + (const_int 1)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 4) + (const_int 3))) + (if_then_else (lt (minus (pc) (match_dup 1)) (const_int 260000)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 2) + (const_int 1)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 4) + (const_int 3)))) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 2) + (const_int 1))) + (eq_attr "branch_type" "fcc") + (if_then_else (match_operand 0 "fcc0_register_operand" "") + (if_then_else (eq_attr "empty_delay_slot" "true") + (if_then_else (eq (symbol_ref "TARGET_V9") (const_int 0)) + (const_int 3) + (const_int 2)) + (if_then_else (eq (symbol_ref "TARGET_V9") (const_int 0)) + (const_int 2) + (const_int 1))) + (if_then_else (lt (pc) (match_dup 2)) + (if_then_else (lt (minus (match_dup 2) (pc)) (const_int 260000)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 2) + (const_int 1)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 4) + (const_int 3))) + (if_then_else (lt (minus (pc) (match_dup 2)) (const_int 260000)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 2) + (const_int 1)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 4) + (const_int 3))))) + (eq_attr "branch_type" "reg") + (if_then_else (lt (pc) (match_dup 2)) + (if_then_else (lt (minus (match_dup 2) (pc)) (const_int 32000)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 2) + (const_int 1)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 4) + (const_int 3))) + (if_then_else (lt (minus (pc) (match_dup 2)) (const_int 32000)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 2) + (const_int 1)) + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 4) + (const_int 3)))) + ] (const_int 1))) + +;; FP precision. +(define_attr "fptype" "single,double" + (const_string "single")) + +;; UltraSPARC-III integer load type. +(define_attr "us3load_type" "2cycle,3cycle" + (const_string "2cycle")) + +(define_asm_attributes + [(set_attr "length" "2") + (set_attr "type" "multi")]) + +;; Attributes for instruction and branch scheduling +(define_attr "tls_call_delay" "false,true" + (symbol_ref "(tls_call_delay (insn) + ? TLS_CALL_DELAY_TRUE : TLS_CALL_DELAY_FALSE)")) + +(define_attr "in_call_delay" "false,true" + (cond [(eq_attr "type" "uncond_branch,branch,call,sibcall,call_no_delay_slot,multi") + (const_string "false") + (eq_attr "type" "load,fpload,store,fpstore") + (if_then_else (eq_attr "length" "1") + (const_string "true") + (const_string "false"))] + (if_then_else (and (eq_attr "length" "1") + (eq_attr "tls_call_delay" "true")) + (const_string "true") + (const_string "false")))) + +(define_attr "eligible_for_sibcall_delay" "false,true" + (symbol_ref "(eligible_for_sibcall_delay (insn) + ? ELIGIBLE_FOR_SIBCALL_DELAY_TRUE + : ELIGIBLE_FOR_SIBCALL_DELAY_FALSE)")) + +(define_attr "eligible_for_return_delay" "false,true" + (symbol_ref "(eligible_for_return_delay (insn) + ? ELIGIBLE_FOR_RETURN_DELAY_TRUE + : ELIGIBLE_FOR_RETURN_DELAY_FALSE)")) + +;; ??? !v9: Should implement the notion of predelay slots for floating-point +;; branches. This would allow us to remove the nop always inserted before +;; a floating point branch. + +;; ??? It is OK for fill_simple_delay_slots to put load/store instructions +;; in a delay slot, but it is not OK for fill_eager_delay_slots to do so. +;; This is because doing so will add several pipeline stalls to the path +;; that the load/store did not come from. Unfortunately, there is no way +;; to prevent fill_eager_delay_slots from using load/store without completely +;; disabling them. For the SPEC benchmark set, this is a serious lose, +;; because it prevents us from moving back the final store of inner loops. + +(define_attr "in_branch_delay" "false,true" + (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi") + (eq_attr "length" "1")) + (const_string "true") + (const_string "false"))) + +(define_attr "in_uncond_branch_delay" "false,true" + (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi") + (eq_attr "length" "1")) + (const_string "true") + (const_string "false"))) + +(define_attr "in_annul_branch_delay" "false,true" + (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi") + (eq_attr "length" "1")) + (const_string "true") + (const_string "false"))) + +(define_delay (eq_attr "type" "call") + [(eq_attr "in_call_delay" "true") (nil) (nil)]) + +(define_delay (eq_attr "type" "sibcall") + [(eq_attr "eligible_for_sibcall_delay" "true") (nil) (nil)]) + +(define_delay (eq_attr "type" "branch") + [(eq_attr "in_branch_delay" "true") + (nil) (eq_attr "in_annul_branch_delay" "true")]) + +(define_delay (eq_attr "type" "uncond_branch") + [(eq_attr "in_uncond_branch_delay" "true") + (nil) (nil)]) + +(define_delay (eq_attr "type" "return") + [(eq_attr "eligible_for_return_delay" "true") (nil) (nil)]) + + +;; Include SPARC DFA schedulers + +(include "cypress.md") +(include "supersparc.md") +(include "hypersparc.md") +(include "leon.md") +(include "sparclet.md") +(include "ultra1_2.md") +(include "ultra3.md") +(include "niagara.md") +(include "niagara2.md") + + +;; Operand and operator predicates and constraints + +(include "predicates.md") +(include "constraints.md") + + +;; Compare instructions. + +;; These are just the DEFINE_INSNs to match the patterns and the +;; DEFINE_SPLITs for some of the scc insns that actually require +;; more than one machine instruction. DEFINE_EXPANDs are further down. + +;; The compare DEFINE_INSNs. + +(define_insn "*cmpsi_insn" + [(set (reg:CC 100) + (compare:CC (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "arith_operand" "rI")))] + "" + "cmp\t%0, %1" + [(set_attr "type" "compare")]) + +(define_insn "*cmpdi_sp64" + [(set (reg:CCX 100) + (compare:CCX (match_operand:DI 0 "register_operand" "r") + (match_operand:DI 1 "arith_operand" "rI")))] + "TARGET_ARCH64" + "cmp\t%0, %1" + [(set_attr "type" "compare")]) + +(define_insn "*cmpsf_fpe" + [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c") + (compare:CCFPE (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_FPU" +{ + if (TARGET_V9) + return "fcmpes\t%0, %1, %2"; + return "fcmpes\t%1, %2"; +} + [(set_attr "type" "fpcmp")]) + +(define_insn "*cmpdf_fpe" + [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c") + (compare:CCFPE (match_operand:DF 1 "register_operand" "e") + (match_operand:DF 2 "register_operand" "e")))] + "TARGET_FPU" +{ + if (TARGET_V9) + return "fcmped\t%0, %1, %2"; + return "fcmped\t%1, %2"; +} + [(set_attr "type" "fpcmp") + (set_attr "fptype" "double")]) + +(define_insn "*cmptf_fpe" + [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c") + (compare:CCFPE (match_operand:TF 1 "register_operand" "e") + (match_operand:TF 2 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" +{ + if (TARGET_V9) + return "fcmpeq\t%0, %1, %2"; + return "fcmpeq\t%1, %2"; +} + [(set_attr "type" "fpcmp")]) + +(define_insn "*cmpsf_fp" + [(set (match_operand:CCFP 0 "fcc_register_operand" "=c") + (compare:CCFP (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_FPU" +{ + if (TARGET_V9) + return "fcmps\t%0, %1, %2"; + return "fcmps\t%1, %2"; +} + [(set_attr "type" "fpcmp")]) + +(define_insn "*cmpdf_fp" + [(set (match_operand:CCFP 0 "fcc_register_operand" "=c") + (compare:CCFP (match_operand:DF 1 "register_operand" "e") + (match_operand:DF 2 "register_operand" "e")))] + "TARGET_FPU" +{ + if (TARGET_V9) + return "fcmpd\t%0, %1, %2"; + return "fcmpd\t%1, %2"; +} + [(set_attr "type" "fpcmp") + (set_attr "fptype" "double")]) + +(define_insn "*cmptf_fp" + [(set (match_operand:CCFP 0 "fcc_register_operand" "=c") + (compare:CCFP (match_operand:TF 1 "register_operand" "e") + (match_operand:TF 2 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" +{ + if (TARGET_V9) + return "fcmpq\t%0, %1, %2"; + return "fcmpq\t%1, %2"; +} + [(set_attr "type" "fpcmp")]) + +;; Next come the scc insns. + +(define_expand "cstoresi4" + [(use (match_operator 1 "comparison_operator" + [(match_operand:SI 2 "compare_operand" "") + (match_operand:SI 3 "arith_operand" "")])) + (clobber (match_operand:SI 0 "register_operand"))] + "" +{ + if (GET_CODE (operands[2]) == ZERO_EXTRACT && operands[3] != const0_rtx) + operands[2] = force_reg (SImode, operands[2]); + if (emit_scc_insn (operands)) DONE; else FAIL; +}) + +(define_expand "cstoredi4" + [(use (match_operator 1 "comparison_operator" + [(match_operand:DI 2 "compare_operand" "") + (match_operand:DI 3 "arith_operand" "")])) + (clobber (match_operand:SI 0 "register_operand"))] + "TARGET_ARCH64" +{ + if (GET_CODE (operands[2]) == ZERO_EXTRACT && operands[3] != const0_rtx) + operands[2] = force_reg (DImode, operands[2]); + if (emit_scc_insn (operands)) DONE; else FAIL; +}) + +(define_expand "cstore4" + [(use (match_operator 1 "comparison_operator" + [(match_operand:F 2 "register_operand" "") + (match_operand:F 3 "register_operand" "")])) + (clobber (match_operand:SI 0 "register_operand"))] + "TARGET_FPU" + { if (emit_scc_insn (operands)) DONE; else FAIL; }) + + + +;; Seq_special[_xxx] and sne_special[_xxx] clobber the CC reg, because they +;; generate addcc/subcc instructions. + +(define_expand "seqsi_special" + [(set (match_dup 3) + (xor:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_operand:SI 0 "register_operand" "") + (eq:SI (match_dup 3) (const_int 0))) + (clobber (reg:CC 100))])] + "" + { operands[3] = gen_reg_rtx (SImode); }) + +(define_expand "seqdi_special" + [(set (match_dup 3) + (xor:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") + (eq:SI (match_dup 3) (const_int 0)))] + "TARGET_ARCH64" + { operands[3] = gen_reg_rtx (DImode); }) + +(define_expand "snesi_special" + [(set (match_dup 3) + (xor:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_operand:SI 0 "register_operand" "") + (ne:SI (match_dup 3) (const_int 0))) + (clobber (reg:CC 100))])] + "" + { operands[3] = gen_reg_rtx (SImode); }) + +(define_expand "snedi_special" + [(set (match_dup 3) + (xor:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") + (ne:SI (match_dup 3) (const_int 0)))] + "TARGET_ARCH64" + { operands[3] = gen_reg_rtx (DImode); }) + + +;; Now the DEFINE_INSNs for the scc cases. + +;; The SEQ and SNE patterns are special because they can be done +;; without any branching and do not involve a COMPARE. We want +;; them to always use the splits below so the results can be +;; scheduled. + +(define_insn_and_split "*snesi_zero" + [(set (match_operand:SI 0 "register_operand" "=r") + (ne:SI (match_operand:SI 1 "register_operand" "r") + (const_int 0))) + (clobber (reg:CC 100))] + "" + "#" + "" + [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1)) + (const_int 0))) + (set (match_dup 0) (ltu:SI (reg:CC 100) (const_int 0)))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*neg_snesi_zero" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (ne:SI (match_operand:SI 1 "register_operand" "r") + (const_int 0)))) + (clobber (reg:CC 100))] + "" + "#" + "" + [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1)) + (const_int 0))) + (set (match_dup 0) (neg:SI (ltu:SI (reg:CC 100) (const_int 0))))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*snesi_zero_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (ne:DI (match_operand:SI 1 "register_operand" "r") + (const_int 0))) + (clobber (reg:CC 100))] + "TARGET_ARCH64" + "#" + "&& 1" + [(set (reg:CC_NOOV 100) (compare:CC_NOOV (minus:SI (const_int 0) + (match_dup 1)) + (const_int 0))) + (set (match_dup 0) (zero_extend:DI (plus:SI (plus:SI (const_int 0) + (const_int 0)) + (ltu:SI (reg:CC_NOOV 100) + (const_int 0)))))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*snedi_zero" + [(set (match_operand:DI 0 "register_operand" "=&r") + (ne:DI (match_operand:DI 1 "register_operand" "r") + (const_int 0)))] + "TARGET_ARCH64" + "#" + "&& ! reg_overlap_mentioned_p (operands[1], operands[0])" + [(set (match_dup 0) (const_int 0)) + (set (match_dup 0) (if_then_else:DI (ne:DI (match_dup 1) + (const_int 0)) + (const_int 1) + (match_dup 0)))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*neg_snedi_zero" + [(set (match_operand:DI 0 "register_operand" "=&r") + (neg:DI (ne:DI (match_operand:DI 1 "register_operand" "r") + (const_int 0))))] + "TARGET_ARCH64" + "#" + "&& ! reg_overlap_mentioned_p (operands[1], operands[0])" + [(set (match_dup 0) (const_int 0)) + (set (match_dup 0) (if_then_else:DI (ne:DI (match_dup 1) + (const_int 0)) + (const_int -1) + (match_dup 0)))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*snedi_zero_trunc" + [(set (match_operand:SI 0 "register_operand" "=&r") + (ne:SI (match_operand:DI 1 "register_operand" "r") + (const_int 0)))] + "TARGET_ARCH64" + "#" + "&& ! reg_overlap_mentioned_p (operands[1], operands[0])" + [(set (match_dup 0) (const_int 0)) + (set (match_dup 0) (if_then_else:SI (ne:DI (match_dup 1) + (const_int 0)) + (const_int 1) + (match_dup 0)))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*seqsi_zero" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (match_operand:SI 1 "register_operand" "r") + (const_int 0))) + (clobber (reg:CC 100))] + "" + "#" + "" + [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1)) + (const_int 0))) + (set (match_dup 0) (geu:SI (reg:CC 100) (const_int 0)))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*neg_seqsi_zero" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (eq:SI (match_operand:SI 1 "register_operand" "r") + (const_int 0)))) + (clobber (reg:CC 100))] + "" + "#" + "" + [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1)) + (const_int 0))) + (set (match_dup 0) (neg:SI (geu:SI (reg:CC 100) (const_int 0))))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*seqsi_zero_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (eq:DI (match_operand:SI 1 "register_operand" "r") + (const_int 0))) + (clobber (reg:CC 100))] + "TARGET_ARCH64" + "#" + "&& 1" + [(set (reg:CC_NOOV 100) (compare:CC_NOOV (minus:SI (const_int 0) + (match_dup 1)) + (const_int 0))) + (set (match_dup 0) (zero_extend:DI (minus:SI (minus:SI (const_int 0) + (const_int -1)) + (ltu:SI (reg:CC_NOOV 100) + (const_int 0)))))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*seqdi_zero" + [(set (match_operand:DI 0 "register_operand" "=&r") + (eq:DI (match_operand:DI 1 "register_operand" "r") + (const_int 0)))] + "TARGET_ARCH64" + "#" + "&& ! reg_overlap_mentioned_p (operands[1], operands[0])" + [(set (match_dup 0) (const_int 0)) + (set (match_dup 0) (if_then_else:DI (eq:DI (match_dup 1) + (const_int 0)) + (const_int 1) + (match_dup 0)))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*neg_seqdi_zero" + [(set (match_operand:DI 0 "register_operand" "=&r") + (neg:DI (eq:DI (match_operand:DI 1 "register_operand" "r") + (const_int 0))))] + "TARGET_ARCH64" + "#" + "&& ! reg_overlap_mentioned_p (operands[1], operands[0])" + [(set (match_dup 0) (const_int 0)) + (set (match_dup 0) (if_then_else:DI (eq:DI (match_dup 1) + (const_int 0)) + (const_int -1) + (match_dup 0)))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*seqdi_zero_trunc" + [(set (match_operand:SI 0 "register_operand" "=&r") + (eq:SI (match_operand:DI 1 "register_operand" "r") + (const_int 0)))] + "TARGET_ARCH64" + "#" + "&& ! reg_overlap_mentioned_p (operands[1], operands[0])" + [(set (match_dup 0) (const_int 0)) + (set (match_dup 0) (if_then_else:SI (eq:DI (match_dup 1) + (const_int 0)) + (const_int 1) + (match_dup 0)))] + "" + [(set_attr "length" "2")]) + +;; We can also do (x + (i == 0)) and related, so put them in. +;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode +;; versions for v9. + +(define_insn_and_split "*x_plus_i_ne_0" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (ne:SI (match_operand:SI 1 "register_operand" "r") + (const_int 0)) + (match_operand:SI 2 "register_operand" "r"))) + (clobber (reg:CC 100))] + "" + "#" + "" + [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1)) + (const_int 0))) + (set (match_dup 0) (plus:SI (ltu:SI (reg:CC 100) (const_int 0)) + (match_dup 2)))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*x_minus_i_ne_0" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 2 "register_operand" "r") + (ne:SI (match_operand:SI 1 "register_operand" "r") + (const_int 0)))) + (clobber (reg:CC 100))] + "" + "#" + "" + [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1)) + (const_int 0))) + (set (match_dup 0) (minus:SI (match_dup 2) + (ltu:SI (reg:CC 100) (const_int 0))))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*x_plus_i_eq_0" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (eq:SI (match_operand:SI 1 "register_operand" "r") + (const_int 0)) + (match_operand:SI 2 "register_operand" "r"))) + (clobber (reg:CC 100))] + "" + "#" + "" + [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1)) + (const_int 0))) + (set (match_dup 0) (plus:SI (geu:SI (reg:CC 100) (const_int 0)) + (match_dup 2)))] + "" + [(set_attr "length" "2")]) + +(define_insn_and_split "*x_minus_i_eq_0" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 2 "register_operand" "r") + (eq:SI (match_operand:SI 1 "register_operand" "r") + (const_int 0)))) + (clobber (reg:CC 100))] + "" + "#" + "" + [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1)) + (const_int 0))) + (set (match_dup 0) (minus:SI (match_dup 2) + (geu:SI (reg:CC 100) (const_int 0))))] + "" + [(set_attr "length" "2")]) + +;; We can also do GEU and LTU directly, but these operate after a compare. +;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode +;; versions for v9. + +(define_insn "*sltu_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (ltu:SI (reg:CC 100) (const_int 0)))] + "" + "addx\t%%g0, 0, %0" + [(set_attr "type" "ialuX")]) + +(define_insn "*neg_sltu_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (ltu:SI (reg:CC 100) (const_int 0))))] + "" + "subx\t%%g0, 0, %0" + [(set_attr "type" "ialuX")]) + +;; ??? Combine should canonicalize these next two to the same pattern. +(define_insn "*neg_sltu_minus_x" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (neg:SI (ltu:SI (reg:CC 100) (const_int 0))) + (match_operand:SI 1 "arith_operand" "rI")))] + "" + "subx\t%%g0, %1, %0" + [(set_attr "type" "ialuX")]) + +(define_insn "*neg_sltu_plus_x" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (plus:SI (ltu:SI (reg:CC 100) (const_int 0)) + (match_operand:SI 1 "arith_operand" "rI"))))] + "" + "subx\t%%g0, %1, %0" + [(set_attr "type" "ialuX")]) + +(define_insn "*sgeu_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (geu:SI (reg:CC 100) (const_int 0)))] + "" + "subx\t%%g0, -1, %0" + [(set_attr "type" "ialuX")]) + +(define_insn "*neg_sgeu_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (geu:SI (reg:CC 100) (const_int 0))))] + "" + "addx\t%%g0, -1, %0" + [(set_attr "type" "ialuX")]) + +;; We can also do (x + ((unsigned) i >= 0)) and related, so put them in. +;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode +;; versions for v9. + +(define_insn "*sltu_plus_x" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (ltu:SI (reg:CC 100) (const_int 0)) + (match_operand:SI 1 "arith_operand" "rI")))] + "" + "addx\t%%g0, %1, %0" + [(set_attr "type" "ialuX")]) + +(define_insn "*sltu_plus_x_plus_y" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (ltu:SI (reg:CC 100) (const_int 0)) + (plus:SI (match_operand:SI 1 "arith_operand" "%r") + (match_operand:SI 2 "arith_operand" "rI"))))] + "" + "addx\t%1, %2, %0" + [(set_attr "type" "ialuX")]) + +(define_insn "*x_minus_sltu" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "r") + (ltu:SI (reg:CC 100) (const_int 0))))] + "" + "subx\t%1, 0, %0" + [(set_attr "type" "ialuX")]) + +;; ??? Combine should canonicalize these next two to the same pattern. +(define_insn "*x_minus_y_minus_sltu" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ") + (match_operand:SI 2 "arith_operand" "rI")) + (ltu:SI (reg:CC 100) (const_int 0))))] + "" + "subx\t%r1, %2, %0" + [(set_attr "type" "ialuX")]) + +(define_insn "*x_minus_sltu_plus_y" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ") + (plus:SI (ltu:SI (reg:CC 100) (const_int 0)) + (match_operand:SI 2 "arith_operand" "rI"))))] + "" + "subx\t%r1, %2, %0" + [(set_attr "type" "ialuX")]) + +(define_insn "*sgeu_plus_x" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (geu:SI (reg:CC 100) (const_int 0)) + (match_operand:SI 1 "register_operand" "r")))] + "" + "subx\t%1, -1, %0" + [(set_attr "type" "ialuX")]) + +(define_insn "*x_minus_sgeu" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "r") + (geu:SI (reg:CC 100) (const_int 0))))] + "" + "addx\t%1, -1, %0" + [(set_attr "type" "ialuX")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 2 "noov_compare_operator" + [(match_operand 1 "icc_or_fcc_register_operand" "") + (const_int 0)]))] + "TARGET_V9 + && REGNO (operands[1]) == SPARC_ICC_REG + && (GET_MODE (operands[1]) == CCXmode + /* 32-bit LTU/GEU are better implemented using addx/subx. */ + || (GET_CODE (operands[2]) != LTU && GET_CODE (operands[2]) != GEU))" + [(set (match_dup 0) (const_int 0)) + (set (match_dup 0) + (if_then_else:SI (match_op_dup:SI 2 [(match_dup 1) (const_int 0)]) + (const_int 1) + (match_dup 0)))] + "") + + +;; These control RTL generation for conditional jump insns + +(define_expand "cbranchcc4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "compare_operand" "") + (match_operand 2 "const_zero_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + "") + +(define_expand "cbranchsi4" + [(use (match_operator 0 "comparison_operator" + [(match_operand:SI 1 "compare_operand" "") + (match_operand:SI 2 "arith_operand" "")])) + (use (match_operand 3 ""))] + "" +{ + if (GET_CODE (operands[1]) == ZERO_EXTRACT && operands[2] != const0_rtx) + operands[1] = force_reg (SImode, operands[1]); + emit_conditional_branch_insn (operands); + DONE; +}) + +(define_expand "cbranchdi4" + [(use (match_operator 0 "comparison_operator" + [(match_operand:DI 1 "compare_operand" "") + (match_operand:DI 2 "arith_operand" "")])) + (use (match_operand 3 ""))] + "TARGET_ARCH64" +{ + if (GET_CODE (operands[1]) == ZERO_EXTRACT && operands[2] != const0_rtx) + operands[1] = force_reg (DImode, operands[1]); + emit_conditional_branch_insn (operands); + DONE; +}) + +(define_expand "cbranch4" + [(use (match_operator 0 "comparison_operator" + [(match_operand:F 1 "register_operand" "") + (match_operand:F 2 "register_operand" "")])) + (use (match_operand 3 ""))] + "TARGET_FPU" + { emit_conditional_branch_insn (operands); DONE; }) + + +;; Now match both normal and inverted jump. + +;; XXX fpcmp nop braindamage +(define_insn "*normal_branch" + [(set (pc) + (if_then_else (match_operator 0 "noov_compare_operator" + [(reg 100) (const_int 0)]) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" +{ + return output_cbranch (operands[0], operands[1], 1, 0, + final_sequence && INSN_ANNULLED_BRANCH_P (insn), + insn); +} + [(set_attr "type" "branch") + (set_attr "branch_type" "icc")]) + +;; XXX fpcmp nop braindamage +(define_insn "*inverted_branch" + [(set (pc) + (if_then_else (match_operator 0 "noov_compare_operator" + [(reg 100) (const_int 0)]) + (pc) + (label_ref (match_operand 1 "" ""))))] + "" +{ + return output_cbranch (operands[0], operands[1], 1, 1, + final_sequence && INSN_ANNULLED_BRANCH_P (insn), + insn); +} + [(set_attr "type" "branch") + (set_attr "branch_type" "icc")]) + +;; XXX fpcmp nop braindamage +(define_insn "*normal_fp_branch" + [(set (pc) + (if_then_else (match_operator 1 "comparison_operator" + [(match_operand:CCFP 0 "fcc_register_operand" "c") + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +{ + return output_cbranch (operands[1], operands[2], 2, 0, + final_sequence && INSN_ANNULLED_BRANCH_P (insn), + insn); +} + [(set_attr "type" "branch") + (set_attr "branch_type" "fcc")]) + +;; XXX fpcmp nop braindamage +(define_insn "*inverted_fp_branch" + [(set (pc) + (if_then_else (match_operator 1 "comparison_operator" + [(match_operand:CCFP 0 "fcc_register_operand" "c") + (const_int 0)]) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" +{ + return output_cbranch (operands[1], operands[2], 2, 1, + final_sequence && INSN_ANNULLED_BRANCH_P (insn), + insn); +} + [(set_attr "type" "branch") + (set_attr "branch_type" "fcc")]) + +;; XXX fpcmp nop braindamage +(define_insn "*normal_fpe_branch" + [(set (pc) + (if_then_else (match_operator 1 "comparison_operator" + [(match_operand:CCFPE 0 "fcc_register_operand" "c") + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +{ + return output_cbranch (operands[1], operands[2], 2, 0, + final_sequence && INSN_ANNULLED_BRANCH_P (insn), + insn); +} + [(set_attr "type" "branch") + (set_attr "branch_type" "fcc")]) + +;; XXX fpcmp nop braindamage +(define_insn "*inverted_fpe_branch" + [(set (pc) + (if_then_else (match_operator 1 "comparison_operator" + [(match_operand:CCFPE 0 "fcc_register_operand" "c") + (const_int 0)]) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" +{ + return output_cbranch (operands[1], operands[2], 2, 1, + final_sequence && INSN_ANNULLED_BRANCH_P (insn), + insn); +} + [(set_attr "type" "branch") + (set_attr "branch_type" "fcc")]) + +;; SPARC V9-specific jump insns. None of these are guaranteed to be +;; in the architecture. + +;; There are no 32 bit brreg insns. + +;; XXX +(define_insn "*normal_int_branch_sp64" + [(set (pc) + (if_then_else (match_operator 0 "v9_register_compare_operator" + [(match_operand:DI 1 "register_operand" "r") + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "TARGET_ARCH64" +{ + return output_v9branch (operands[0], operands[2], 1, 2, 0, + final_sequence && INSN_ANNULLED_BRANCH_P (insn), + insn); +} + [(set_attr "type" "branch") + (set_attr "branch_type" "reg")]) + +;; XXX +(define_insn "*inverted_int_branch_sp64" + [(set (pc) + (if_then_else (match_operator 0 "v9_register_compare_operator" + [(match_operand:DI 1 "register_operand" "r") + (const_int 0)]) + (pc) + (label_ref (match_operand 2 "" ""))))] + "TARGET_ARCH64" +{ + return output_v9branch (operands[0], operands[2], 1, 2, 1, + final_sequence && INSN_ANNULLED_BRANCH_P (insn), + insn); +} + [(set_attr "type" "branch") + (set_attr "branch_type" "reg")]) + + +;; Load in operand 0 the (absolute) address of operand 1, which is a symbolic +;; value subject to a PC-relative relocation. Operand 2 is a helper function +;; that adds the PC value at the call point to register #(operand 3). + +(define_insn "load_pcrel_sym" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "symbolic_operand" "") + (match_operand:P 2 "call_address_operand" "") + (match_operand:P 3 "const_int_operand" "")] UNSPEC_LOAD_PCREL_SYM)) + (clobber (reg:P 15))] + "REGNO (operands[0]) == INTVAL (operands[3])" +{ + if (flag_delayed_branch) + return "sethi\t%%hi(%a1-4), %0\n\tcall\t%a2\n\t add\t%0, %%lo(%a1+4), %0"; + else + return "sethi\t%%hi(%a1-8), %0\n\tadd\t%0, %%lo(%a1-4), %0\n\tcall\t%a2\n\t nop"; +} + [(set (attr "type") (const_string "multi")) + (set (attr "length") + (if_then_else (eq_attr "delayed_branch" "true") + (const_int 3) + (const_int 4)))]) + + +;; Integer move instructions + +(define_expand "movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" +{ + if (sparc_expand_move (QImode, operands)) + DONE; +}) + +(define_insn "*movqi_insn" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,m") + (match_operand:QI 1 "input_operand" "rI,m,rJ"))] + "(register_operand (operands[0], QImode) + || register_or_zero_operand (operands[1], QImode))" + "@ + mov\t%1, %0 + ldub\t%1, %0 + stb\t%r1, %0" + [(set_attr "type" "*,load,store") + (set_attr "us3load_type" "*,3cycle,*")]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" +{ + if (sparc_expand_move (HImode, operands)) + DONE; +}) + +(define_insn "*movhi_insn" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "input_operand" "rI,K,m,rJ"))] + "(register_operand (operands[0], HImode) + || register_or_zero_operand (operands[1], HImode))" + "@ + mov\t%1, %0 + sethi\t%%hi(%a1), %0 + lduh\t%1, %0 + sth\t%r1, %0" + [(set_attr "type" "*,*,load,store") + (set_attr "us3load_type" "*,*,3cycle,*")]) + +;; We always work with constants here. +(define_insn "*movhi_lo_sum" + [(set (match_operand:HI 0 "register_operand" "=r") + (ior:HI (match_operand:HI 1 "register_operand" "%r") + (match_operand:HI 2 "small_int_operand" "I")))] + "" + "or\t%1, %2, %0") + +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" +{ + if (sparc_expand_move (SImode, operands)) + DONE; +}) + +(define_insn "*movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,m,!f,!f,!m,d") + (match_operand:SI 1 "input_operand" "rI,K,m,rJ,f,m,f,J"))] + "(register_operand (operands[0], SImode) + || register_or_zero_operand (operands[1], SImode))" + "@ + mov\t%1, %0 + sethi\t%%hi(%a1), %0 + ld\t%1, %0 + st\t%r1, %0 + fmovs\t%1, %0 + ld\t%1, %0 + st\t%1, %0 + fzeros\t%0" + [(set_attr "type" "*,*,load,store,fpmove,fpload,fpstore,fga")]) + +(define_insn "*movsi_lo_sum" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "immediate_operand" "in")))] + "" + "or\t%1, %%lo(%a2), %0") + +(define_insn "*movsi_high" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI (match_operand:SI 1 "immediate_operand" "in")))] + "" + "sethi\t%%hi(%a1), %0") + +;; The next two patterns must wrap the SYMBOL_REF in an UNSPEC +;; so that CSE won't optimize the address computation away. +(define_insn "movsi_lo_sum_pic" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand:SI 2 "immediate_operand" "in")] UNSPEC_MOVE_PIC)))] + "flag_pic" +{ +#ifdef HAVE_AS_SPARC_GOTDATA_OP + return "xor\t%1, %%gdop_lox10(%a2), %0"; +#else + return "or\t%1, %%lo(%a2), %0"; +#endif +}) + +(define_insn "movsi_high_pic" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI (unspec:SI [(match_operand 1 "" "")] UNSPEC_MOVE_PIC)))] + "flag_pic && check_pic (1)" +{ +#ifdef HAVE_AS_SPARC_GOTDATA_OP + return "sethi\t%%gdop_hix22(%a1), %0"; +#else + return "sethi\t%%hi(%a1), %0"; +#endif +}) + +(define_insn "movsi_pic_gotdata_op" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r") + (match_operand 3 "symbolic_operand" "")] UNSPEC_MOVE_GOTDATA))] + "flag_pic && check_pic (1)" +{ +#ifdef HAVE_AS_SPARC_GOTDATA_OP + return "ld\t[%1 + %2], %0, %%gdop(%a3)"; +#else + return "ld\t[%1 + %2], %0"; +#endif +} + [(set_attr "type" "load")]) + +(define_expand "movsi_pic_label_ref" + [(set (match_dup 3) (high:SI + (unspec:SI [(match_operand:SI 1 "label_ref_operand" "") + (match_dup 2)] UNSPEC_MOVE_PIC_LABEL))) + (set (match_dup 4) (lo_sum:SI (match_dup 3) + (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_MOVE_PIC_LABEL))) + (set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_dup 5) (match_dup 4)))] + "flag_pic" +{ + crtl->uses_pic_offset_table = 1; + operands[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + if (!can_create_pseudo_p ()) + { + operands[3] = operands[0]; + operands[4] = operands[0]; + } + else + { + operands[3] = gen_reg_rtx (SImode); + operands[4] = gen_reg_rtx (SImode); + } + operands[5] = pic_offset_table_rtx; +}) + +(define_insn "*movsi_high_pic_label_ref" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI + (unspec:SI [(match_operand:SI 1 "label_ref_operand" "") + (match_operand:SI 2 "" "")] UNSPEC_MOVE_PIC_LABEL)))] + "flag_pic" + "sethi\t%%hi(%a2-(%a1-.)), %0") + +(define_insn "*movsi_lo_sum_pic_label_ref" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand:SI 2 "label_ref_operand" "") + (match_operand:SI 3 "" "")] UNSPEC_MOVE_PIC_LABEL)))] + "flag_pic" + "or\t%1, %%lo(%a3-(%a2-.)), %0") + +;; Set up the PIC register for VxWorks. + +(define_expand "vxworks_load_got" + [(set (match_dup 0) + (high:SI (match_dup 1))) + (set (match_dup 0) + (mem:SI (lo_sum:SI (match_dup 0) (match_dup 1)))) + (set (match_dup 0) + (mem:SI (lo_sum:SI (match_dup 0) (match_dup 2))))] + "TARGET_VXWORKS_RTP" +{ + operands[0] = pic_offset_table_rtx; + operands[1] = gen_rtx_SYMBOL_REF (SImode, VXWORKS_GOTT_BASE); + operands[2] = gen_rtx_SYMBOL_REF (SImode, VXWORKS_GOTT_INDEX); +}) + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" +{ + if (sparc_expand_move (DImode, operands)) + DONE; +}) + +;; Be careful, fmovd does not exist when !v9. +;; We match MEM moves directly when we have correct even +;; numbered registers, but fall into splits otherwise. +;; The constraint ordering here is really important to +;; avoid insane problems in reload, especially for patterns +;; of the form: +;; +;; (set (mem:DI (plus:SI (reg:SI 30 %fp) +;; (const_int -5016))) +;; (reg:DI 2 %g2)) +;; + +(define_insn "*movdi_insn_sp32" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=o,T,U,o,r,r,r,?T,?f,?f,?o,?f") + (match_operand:DI 1 "input_operand" + " J,U,T,r,o,i,r, f, T, o, f, f"))] + "! TARGET_V9 + && (register_operand (operands[0], DImode) + || register_or_zero_operand (operands[1], DImode))" + "@ + # + std\t%1, %0 + ldd\t%1, %0 + # + # + # + # + std\t%1, %0 + ldd\t%1, %0 + # + # + #" + [(set_attr "type" "store,store,load,*,*,*,*,fpstore,fpload,*,*,*") + (set_attr "length" "2,*,*,2,2,2,2,*,*,2,2,2")]) + +(define_insn "*movdi_insn_sp32_v9" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=T,o,T,U,o,r,r,r,?T,?f,?f,?o,?e,?e,?W") + (match_operand:DI 1 "input_operand" + " J,J,U,T,r,o,i,r, f, T, o, f, e, W, e"))] + "! TARGET_ARCH64 + && TARGET_V9 + && (register_operand (operands[0], DImode) + || register_or_zero_operand (operands[1], DImode))" + "@ + stx\t%%g0, %0 + # + std\t%1, %0 + ldd\t%1, %0 + # + # + # + # + std\t%1, %0 + ldd\t%1, %0 + # + # + fmovd\\t%1, %0 + ldd\\t%1, %0 + std\\t%1, %0" + [(set_attr "type" "store,store,store,load,*,*,*,*,fpstore,fpload,*,*,fpmove,fpload,fpstore") + (set_attr "length" "*,2,*,*,2,2,2,2,*,*,2,2,*,*,*") + (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*")]) + +(define_insn "*movdi_insn_sp64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,m,?e,?e,?W,b") + (match_operand:DI 1 "input_operand" "rI,N,m,rJ,e,W,e,J"))] + "TARGET_ARCH64 + && (register_operand (operands[0], DImode) + || register_or_zero_operand (operands[1], DImode))" + "@ + mov\t%1, %0 + sethi\t%%hi(%a1), %0 + ldx\t%1, %0 + stx\t%r1, %0 + fmovd\t%1, %0 + ldd\t%1, %0 + std\t%1, %0 + fzero\t%0" + [(set_attr "type" "*,*,load,store,fpmove,fpload,fpstore,fga") + (set_attr "fptype" "*,*,*,*,double,*,*,double")]) + +(define_expand "movdi_pic_label_ref" + [(set (match_dup 3) (high:DI + (unspec:DI [(match_operand:DI 1 "label_ref_operand" "") + (match_dup 2)] UNSPEC_MOVE_PIC_LABEL))) + (set (match_dup 4) (lo_sum:DI (match_dup 3) + (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_MOVE_PIC_LABEL))) + (set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_dup 5) (match_dup 4)))] + "TARGET_ARCH64 && flag_pic" +{ + crtl->uses_pic_offset_table = 1; + operands[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + if (!can_create_pseudo_p ()) + { + operands[3] = operands[0]; + operands[4] = operands[0]; + } + else + { + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + } + operands[5] = pic_offset_table_rtx; +}) + +(define_insn "*movdi_high_pic_label_ref" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI + (unspec:DI [(match_operand:DI 1 "label_ref_operand" "") + (match_operand:DI 2 "" "")] UNSPEC_MOVE_PIC_LABEL)))] + "TARGET_ARCH64 && flag_pic" + "sethi\t%%hi(%a2-(%a1-.)), %0") + +(define_insn "*movdi_lo_sum_pic_label_ref" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:DI 2 "label_ref_operand" "") + (match_operand:DI 3 "" "")] UNSPEC_MOVE_PIC_LABEL)))] + "TARGET_ARCH64 && flag_pic" + "or\t%1, %%lo(%a3-(%a2-.)), %0") + +;; SPARC-v9 code model support insns. See sparc_emit_set_symbolic_const64 +;; in sparc.c to see what is going on here... PIC stuff comes first. + +(define_insn "movdi_lo_sum_pic" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:DI 2 "immediate_operand" "in")] UNSPEC_MOVE_PIC)))] + "TARGET_ARCH64 && flag_pic" +{ +#ifdef HAVE_AS_SPARC_GOTDATA_OP + return "xor\t%1, %%gdop_lox10(%a2), %0"; +#else + return "or\t%1, %%lo(%a2), %0"; +#endif +}) + +(define_insn "movdi_high_pic" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (unspec:DI [(match_operand 1 "" "")] UNSPEC_MOVE_PIC)))] + "TARGET_ARCH64 && flag_pic && check_pic (1)" +{ +#ifdef HAVE_AS_SPARC_GOTDATA_OP + return "sethi\t%%gdop_hix22(%a1), %0"; +#else + return "sethi\t%%hi(%a1), %0"; +#endif +}) + +(define_insn "movdi_pic_gotdata_op" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r") + (match_operand 3 "symbolic_operand" "")] UNSPEC_MOVE_GOTDATA))] + "TARGET_ARCH64 && flag_pic && check_pic (1)" +{ +#ifdef HAVE_AS_SPARC_GOTDATA_OP + return "ldx\t[%1 + %2], %0, %%gdop(%a3)"; +#else + return "ldx\t[%1 + %2], %0"; +#endif +} + [(set_attr "type" "load")]) + +(define_insn "*sethi_di_medlow_embmedany_pic" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (match_operand:DI 1 "medium_pic_operand" "")))] + "(TARGET_CM_MEDLOW || TARGET_CM_EMBMEDANY) && check_pic (1)" + "sethi\t%%hi(%a1), %0") + +(define_insn "*sethi_di_medlow" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (match_operand:DI 1 "symbolic_operand" "")))] + "TARGET_CM_MEDLOW && check_pic (1)" + "sethi\t%%hi(%a1), %0") + +(define_insn "*losum_di_medlow" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand" "")))] + "TARGET_CM_MEDLOW" + "or\t%1, %%lo(%a2), %0") + +(define_insn "seth44" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] UNSPEC_SETH44)))] + "TARGET_CM_MEDMID" + "sethi\t%%h44(%a1), %0") + +(define_insn "setm44" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] UNSPEC_SETM44)))] + "TARGET_CM_MEDMID" + "or\t%1, %%m44(%a2), %0") + +(define_insn "setl44" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand" "")))] + "TARGET_CM_MEDMID" + "or\t%1, %%l44(%a2), %0") + +(define_insn "sethh" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] UNSPEC_SETHH)))] + "TARGET_CM_MEDANY" + "sethi\t%%hh(%a1), %0") + +(define_insn "setlm" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] UNSPEC_SETLM)))] + "TARGET_CM_MEDANY" + "sethi\t%%lm(%a1), %0") + +(define_insn "sethm" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] UNSPEC_EMB_SETHM)))] + "TARGET_CM_MEDANY" + "or\t%1, %%hm(%a2), %0") + +(define_insn "setlo" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand" "")))] + "TARGET_CM_MEDANY" + "or\t%1, %%lo(%a2), %0") + +(define_insn "embmedany_sethi" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (unspec:DI [(match_operand:DI 1 "data_segment_operand" "")] UNSPEC_EMB_HISUM)))] + "TARGET_CM_EMBMEDANY && check_pic (1)" + "sethi\t%%hi(%a1), %0") + +(define_insn "embmedany_losum" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "data_segment_operand" "")))] + "TARGET_CM_EMBMEDANY" + "add\t%1, %%lo(%a2), %0") + +(define_insn "embmedany_brsum" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_EMB_HISUM))] + "TARGET_CM_EMBMEDANY" + "add\t%1, %_, %0") + +(define_insn "embmedany_textuhi" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")] UNSPEC_EMB_TEXTUHI)))] + "TARGET_CM_EMBMEDANY && check_pic (1)" + "sethi\t%%uhi(%a1), %0") + +(define_insn "embmedany_texthi" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")] UNSPEC_EMB_TEXTHI)))] + "TARGET_CM_EMBMEDANY && check_pic (1)" + "sethi\t%%hi(%a1), %0") + +(define_insn "embmedany_textulo" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:DI 2 "text_segment_operand" "")] UNSPEC_EMB_TEXTULO)))] + "TARGET_CM_EMBMEDANY" + "or\t%1, %%ulo(%a2), %0") + +(define_insn "embmedany_textlo" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "text_segment_operand" "")))] + "TARGET_CM_EMBMEDANY" + "or\t%1, %%lo(%a2), %0") + +;; Now some patterns to help reload out a bit. +(define_expand "reload_indi" + [(parallel [(match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "immediate_operand" "") + (match_operand:TI 2 "register_operand" "=&r")])] + "(TARGET_CM_MEDANY + || TARGET_CM_EMBMEDANY) + && ! flag_pic" +{ + sparc_emit_set_symbolic_const64 (operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "reload_outdi" + [(parallel [(match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "immediate_operand" "") + (match_operand:TI 2 "register_operand" "=&r")])] + "(TARGET_CM_MEDANY + || TARGET_CM_EMBMEDANY) + && ! flag_pic" +{ + sparc_emit_set_symbolic_const64 (operands[0], operands[1], operands[2]); + DONE; +}) + +;; Split up putting CONSTs and REGs into DI regs when !arch64 +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "const_int_operand" ""))] + "! TARGET_ARCH64 && reload_completed" + [(clobber (const_int 0))] +{ +#if HOST_BITS_PER_WIDE_INT == 32 + emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), + (INTVAL (operands[1]) < 0) ? + constm1_rtx : + const0_rtx)); + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), + operands[1])); +#else + unsigned int low, high; + + low = trunc_int_for_mode (INTVAL (operands[1]), SImode); + high = trunc_int_for_mode (INTVAL (operands[1]) >> 32, SImode); + emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), GEN_INT (high))); + + /* Slick... but this trick loses if this subreg constant part + can be done in one insn. */ + if (low == high + && ! SPARC_SETHI32_P (high) + && ! SPARC_SIMM13_P (high)) + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), + gen_highpart (SImode, operands[0]))); + else + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), GEN_INT (low))); +#endif + DONE; +}) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "const_double_operand" ""))] + "reload_completed + && (! TARGET_V9 + || (! TARGET_ARCH64 + && ((GET_CODE (operands[0]) == REG + && REGNO (operands[0]) < 32) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) < 32))))" + [(clobber (const_int 0))] +{ + emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), + GEN_INT (CONST_DOUBLE_HIGH (operands[1])))); + + /* Slick... but this trick loses if this subreg constant part + can be done in one insn. */ + if (CONST_DOUBLE_LOW (operands[1]) == CONST_DOUBLE_HIGH (operands[1]) + && ! SPARC_SETHI32_P (CONST_DOUBLE_HIGH (operands[1])) + && ! SPARC_SIMM13_P (CONST_DOUBLE_HIGH (operands[1]))) + { + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), + gen_highpart (SImode, operands[0]))); + } + else + { + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), + GEN_INT (CONST_DOUBLE_LOW (operands[1])))); + } + DONE; +}) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" ""))] + "reload_completed + && (! TARGET_V9 + || (! TARGET_ARCH64 + && ((GET_CODE (operands[0]) == REG + && REGNO (operands[0]) < 32) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) < 32))))" + [(clobber (const_int 0))] +{ + rtx set_dest = operands[0]; + rtx set_src = operands[1]; + rtx dest1, dest2; + rtx src1, src2; + + dest1 = gen_highpart (SImode, set_dest); + dest2 = gen_lowpart (SImode, set_dest); + src1 = gen_highpart (SImode, set_src); + src2 = gen_lowpart (SImode, set_src); + + /* Now emit using the real source and destination we found, swapping + the order if we detect overlap. */ + if (reg_overlap_mentioned_p (dest1, src2)) + { + emit_insn (gen_movsi (dest2, src2)); + emit_insn (gen_movsi (dest1, src1)); + } + else + { + emit_insn (gen_movsi (dest1, src1)); + emit_insn (gen_movsi (dest2, src2)); + } + DONE; +}) + +;; Now handle the cases of memory moves from/to non-even +;; DI mode register pairs. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "memory_operand" ""))] + "(! TARGET_ARCH64 + && reload_completed + && sparc_splitdi_legitimate (operands[0], operands[1]))" + [(clobber (const_int 0))] +{ + rtx word0 = adjust_address (operands[1], SImode, 0); + rtx word1 = adjust_address (operands[1], SImode, 4); + rtx high_part = gen_highpart (SImode, operands[0]); + rtx low_part = gen_lowpart (SImode, operands[0]); + + if (reg_overlap_mentioned_p (high_part, word1)) + { + emit_insn (gen_movsi (low_part, word1)); + emit_insn (gen_movsi (high_part, word0)); + } + else + { + emit_insn (gen_movsi (high_part, word0)); + emit_insn (gen_movsi (low_part, word1)); + } + DONE; +}) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "register_operand" ""))] + "(! TARGET_ARCH64 + && reload_completed + && sparc_splitdi_legitimate (operands[1], operands[0]))" + [(clobber (const_int 0))] +{ + emit_insn (gen_movsi (adjust_address (operands[0], SImode, 0), + gen_highpart (SImode, operands[1]))); + emit_insn (gen_movsi (adjust_address (operands[0], SImode, 4), + gen_lowpart (SImode, operands[1]))); + DONE; +}) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "const_zero_operand" ""))] + "reload_completed + && (! TARGET_V9 + || (! TARGET_ARCH64 + && ! mem_min_alignment (operands[0], 8))) + && offsettable_memref_p (operands[0])" + [(clobber (const_int 0))] +{ + emit_insn (gen_movsi (adjust_address (operands[0], SImode, 0), const0_rtx)); + emit_insn (gen_movsi (adjust_address (operands[0], SImode, 4), const0_rtx)); + DONE; +}) + + +;; Floating point and vector move instructions + +;; Yes, you guessed it right, the former movsf expander. +(define_expand "mov" + [(set (match_operand:V32 0 "nonimmediate_operand" "") + (match_operand:V32 1 "general_operand" ""))] + "mode == SFmode || TARGET_VIS" +{ + if (sparc_expand_move (mode, operands)) + DONE; +}) + +(define_insn "*movsf_insn" + [(set (match_operand:V32 0 "nonimmediate_operand" "=d,f, *r,*r,*r,f,*r,m, m") + (match_operand:V32 1 "input_operand" "GY,f,*rRY, Q, S,m, m,f,*rGY"))] + "TARGET_FPU + && (register_operand (operands[0], mode) + || register_or_zero_operand (operands[1], mode))" +{ + if (GET_CODE (operands[1]) == CONST_DOUBLE + && (which_alternative == 2 + || which_alternative == 3 + || which_alternative == 4)) + { + REAL_VALUE_TYPE r; + long i; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (r, i); + operands[1] = GEN_INT (i); + } + + switch (which_alternative) + { + case 0: + return "fzeros\t%0"; + case 1: + return "fmovs\t%1, %0"; + case 2: + return "mov\t%1, %0"; + case 3: + return "sethi\t%%hi(%a1), %0"; + case 4: + return "#"; + case 5: + case 6: + return "ld\t%1, %0"; + case 7: + case 8: + return "st\t%r1, %0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fga,fpmove,*,*,*,fpload,load,fpstore,store")]) + +;; Exactly the same as above, except that all `f' cases are deleted. +;; This is necessary to prevent reload from ever trying to use a `f' reg +;; when -mno-fpu. + +(define_insn "*movsf_insn_no_fpu" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,r, m") + (match_operand:SF 1 "input_operand" "rR,Q,S,m,rG"))] + "! TARGET_FPU + && (register_operand (operands[0], SFmode) + || register_or_zero_operand (operands[1], SFmode))" +{ + if (GET_CODE (operands[1]) == CONST_DOUBLE + && (which_alternative == 0 + || which_alternative == 1 + || which_alternative == 2)) + { + REAL_VALUE_TYPE r; + long i; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (r, i); + operands[1] = GEN_INT (i); + } + + switch (which_alternative) + { + case 0: + return "mov\t%1, %0"; + case 1: + return "sethi\t%%hi(%a1), %0"; + case 2: + return "#"; + case 3: + return "ld\t%1, %0"; + case 4: + return "st\t%r1, %0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "*,*,*,load,store")]) + +;; The following 3 patterns build SFmode constants in integer registers. + +(define_insn "*movsf_lo_sum" + [(set (match_operand:SF 0 "register_operand" "=r") + (lo_sum:SF (match_operand:SF 1 "register_operand" "r") + (match_operand:SF 2 "fp_const_high_losum_operand" "S")))] + "" +{ + REAL_VALUE_TYPE r; + long i; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[2]); + REAL_VALUE_TO_TARGET_SINGLE (r, i); + operands[2] = GEN_INT (i); + return "or\t%1, %%lo(%a2), %0"; +}) + +(define_insn "*movsf_high" + [(set (match_operand:SF 0 "register_operand" "=r") + (high:SF (match_operand:SF 1 "fp_const_high_losum_operand" "S")))] + "" +{ + REAL_VALUE_TYPE r; + long i; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (r, i); + operands[1] = GEN_INT (i); + return "sethi\t%%hi(%1), %0"; +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "fp_const_high_losum_operand" ""))] + "REG_P (operands[0]) && REGNO (operands[0]) < 32" + [(set (match_dup 0) (high:SF (match_dup 1))) + (set (match_dup 0) (lo_sum:SF (match_dup 0) (match_dup 1)))]) + +;; Yes, you again guessed it right, the former movdf expander. +(define_expand "mov" + [(set (match_operand:V64 0 "nonimmediate_operand" "") + (match_operand:V64 1 "general_operand" ""))] + "mode == DFmode || TARGET_VIS" +{ + if (sparc_expand_move (mode, operands)) + DONE; +}) + +;; Be careful, fmovd does not exist when !v9. +(define_insn "*movdf_insn_sp32" + [(set (match_operand:DF 0 "nonimmediate_operand" "= e,W,U,T,o,e, *r, o, e,o") + (match_operand:DF 1 "input_operand" "W#F,e,T,U,G,e,*rFo,*r,o#F,e"))] + "TARGET_FPU + && ! TARGET_V9 + && (register_operand (operands[0], DFmode) + || register_or_zero_operand (operands[1], DFmode))" + "@ + ldd\t%1, %0 + std\t%1, %0 + ldd\t%1, %0 + std\t%1, %0 + # + # + # + # + # + #" + [(set_attr "type" "fpload,fpstore,load,store,*,*,*,*,*,*") + (set_attr "length" "*,*,*,*,2,2,2,2,2,2")]) + +(define_insn "*movdf_insn_sp32_no_fpu" + [(set (match_operand:DF 0 "nonimmediate_operand" "=U,T,o, r,o") + (match_operand:DF 1 "input_operand" " T,U,G,ro,r"))] + "! TARGET_FPU + && ! TARGET_V9 + && (register_operand (operands[0], DFmode) + || register_or_zero_operand (operands[1], DFmode))" + "@ + ldd\t%1, %0 + std\t%1, %0 + # + # + #" + [(set_attr "type" "load,store,*,*,*") + (set_attr "length" "*,*,2,2,2")]) + +;; We have available v9 double floats but not 64-bit integer registers. +(define_insn "*movdf_insn_sp32_v9" + [(set (match_operand:V64 0 "nonimmediate_operand" "=b,e, e, T,W,U,T, f, *r, o") + (match_operand:V64 1 "input_operand" "GY,e,W#F,GY,e,T,U,o#F,*roFD,*rGYf"))] + "TARGET_FPU + && TARGET_V9 + && ! TARGET_ARCH64 + && (register_operand (operands[0], mode) + || register_or_zero_operand (operands[1], mode))" + "@ + fzero\t%0 + fmovd\t%1, %0 + ldd\t%1, %0 + stx\t%r1, %0 + std\t%1, %0 + ldd\t%1, %0 + std\t%1, %0 + # + # + #" + [(set_attr "type" "fga,fpmove,load,store,store,load,store,*,*,*") + (set_attr "length" "*,*,*,*,*,*,*,2,2,2") + (set_attr "fptype" "double,double,*,*,*,*,*,*,*,*")]) + +(define_insn "*movdf_insn_sp32_v9_no_fpu" + [(set (match_operand:DF 0 "nonimmediate_operand" "=U,T,T, r, o") + (match_operand:DF 1 "input_operand" " T,U,G,ro,rG"))] + "! TARGET_FPU + && TARGET_V9 + && ! TARGET_ARCH64 + && (register_operand (operands[0], DFmode) + || register_or_zero_operand (operands[1], DFmode))" + "@ + ldd\t%1, %0 + std\t%1, %0 + stx\t%r1, %0 + # + #" + [(set_attr "type" "load,store,store,*,*") + (set_attr "length" "*,*,*,2,2")]) + +;; We have available both v9 double floats and 64-bit integer registers. +(define_insn "*movdf_insn_sp64" + [(set (match_operand:V64 0 "nonimmediate_operand" "=b,e, e,W, *r,*r, m,*r") + (match_operand:V64 1 "input_operand" "GY,e,W#F,e,*rGY, m,*rGY,FD"))] + "TARGET_FPU + && TARGET_ARCH64 + && (register_operand (operands[0], mode) + || register_or_zero_operand (operands[1], mode))" + "@ + fzero\t%0 + fmovd\t%1, %0 + ldd\t%1, %0 + std\t%1, %0 + mov\t%r1, %0 + ldx\t%1, %0 + stx\t%r1, %0 + #" + [(set_attr "type" "fga,fpmove,load,store,*,load,store,*") + (set_attr "length" "*,*,*,*,*,*,*,2") + (set_attr "fptype" "double,double,*,*,*,*,*,*")]) + +(define_insn "*movdf_insn_sp64_no_fpu" + [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r, m") + (match_operand:DF 1 "input_operand" "r,m,rG"))] + "! TARGET_FPU + && TARGET_ARCH64 + && (register_operand (operands[0], DFmode) + || register_or_zero_operand (operands[1], DFmode))" + "@ + mov\t%1, %0 + ldx\t%1, %0 + stx\t%r1, %0" + [(set_attr "type" "*,load,store")]) + +;; This pattern builds V64mode constants in integer registers. +(define_split + [(set (match_operand:V64 0 "register_operand" "") + (match_operand:V64 1 "const_double_or_vector_operand" ""))] + "TARGET_FPU + && (GET_CODE (operands[0]) == REG + && REGNO (operands[0]) < 32) + && ! const_zero_operand (operands[1], GET_MODE (operands[0])) + && reload_completed" + [(clobber (const_int 0))] +{ + operands[0] = gen_rtx_raw_REG (DImode, REGNO (operands[0])); + + if (TARGET_ARCH64) + { +#if HOST_BITS_PER_WIDE_INT == 32 + gcc_unreachable (); +#else + enum machine_mode mode = GET_MODE (operands[1]); + rtx tem = simplify_subreg (DImode, operands[1], mode, 0); + emit_insn (gen_movdi (operands[0], tem)); +#endif + } + else + { + enum machine_mode mode = GET_MODE (operands[1]); + rtx hi = simplify_subreg (SImode, operands[1], mode, 0); + rtx lo = simplify_subreg (SImode, operands[1], mode, 4); + + gcc_assert (GET_CODE (hi) == CONST_INT); + gcc_assert (GET_CODE (lo) == CONST_INT); + + emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), hi)); + + /* Slick... but this trick loses if this subreg constant part + can be done in one insn. */ + if (lo == hi + && ! SPARC_SETHI32_P (INTVAL (hi)) + && ! SPARC_SIMM13_P (INTVAL (hi))) + { + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), + gen_highpart (SImode, operands[0]))); + } + else + { + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lo)); + } + } + DONE; +}) + +;; Ok, now the splits to handle all the multi insn and +;; mis-aligned memory address cases. +;; In these splits please take note that we must be +;; careful when V9 but not ARCH64 because the integer +;; register DFmode cases must be handled. +(define_split + [(set (match_operand:V64 0 "register_operand" "") + (match_operand:V64 1 "register_operand" ""))] + "(! TARGET_V9 + || (! TARGET_ARCH64 + && ((GET_CODE (operands[0]) == REG + && REGNO (operands[0]) < 32) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) < 32)))) + && reload_completed" + [(clobber (const_int 0))] +{ + rtx set_dest = operands[0]; + rtx set_src = operands[1]; + rtx dest1, dest2; + rtx src1, src2; + enum machine_mode half_mode; + + /* We can be expanded for DFmode or integral vector modes. */ + if (mode == DFmode) + half_mode = SFmode; + else + half_mode = SImode; + + dest1 = gen_highpart (half_mode, set_dest); + dest2 = gen_lowpart (half_mode, set_dest); + src1 = gen_highpart (half_mode, set_src); + src2 = gen_lowpart (half_mode, set_src); + + /* Now emit using the real source and destination we found, swapping + the order if we detect overlap. */ + if (reg_overlap_mentioned_p (dest1, src2)) + { + emit_move_insn_1 (dest2, src2); + emit_move_insn_1 (dest1, src1); + } + else + { + emit_move_insn_1 (dest1, src1); + emit_move_insn_1 (dest2, src2); + } + DONE; +}) + +(define_split + [(set (match_operand:V64 0 "register_operand" "") + (match_operand:V64 1 "memory_operand" ""))] + "reload_completed + && ! TARGET_ARCH64 + && (((REGNO (operands[0]) % 2) != 0) + || ! mem_min_alignment (operands[1], 8)) + && offsettable_memref_p (operands[1])" + [(clobber (const_int 0))] +{ + enum machine_mode half_mode; + rtx word0, word1; + + /* We can be expanded for DFmode or integral vector modes. */ + if (mode == DFmode) + half_mode = SFmode; + else + half_mode = SImode; + + word0 = adjust_address (operands[1], half_mode, 0); + word1 = adjust_address (operands[1], half_mode, 4); + + if (reg_overlap_mentioned_p (gen_highpart (half_mode, operands[0]), word1)) + { + emit_move_insn_1 (gen_lowpart (half_mode, operands[0]), word1); + emit_move_insn_1 (gen_highpart (half_mode, operands[0]), word0); + } + else + { + emit_move_insn_1 (gen_highpart (half_mode, operands[0]), word0); + emit_move_insn_1 (gen_lowpart (half_mode, operands[0]), word1); + } + DONE; +}) + +(define_split + [(set (match_operand:V64 0 "memory_operand" "") + (match_operand:V64 1 "register_operand" ""))] + "reload_completed + && ! TARGET_ARCH64 + && (((REGNO (operands[1]) % 2) != 0) + || ! mem_min_alignment (operands[0], 8)) + && offsettable_memref_p (operands[0])" + [(clobber (const_int 0))] +{ + enum machine_mode half_mode; + rtx word0, word1; + + /* We can be expanded for DFmode or integral vector modes. */ + if (mode == DFmode) + half_mode = SFmode; + else + half_mode = SImode; + + word0 = adjust_address (operands[0], half_mode, 0); + word1 = adjust_address (operands[0], half_mode, 4); + + emit_move_insn_1 (word0, gen_highpart (half_mode, operands[1])); + emit_move_insn_1 (word1, gen_lowpart (half_mode, operands[1])); + DONE; +}) + +(define_split + [(set (match_operand:V64 0 "memory_operand" "") + (match_operand:V64 1 "const_zero_operand" ""))] + "reload_completed + && (! TARGET_V9 + || (! TARGET_ARCH64 + && ! mem_min_alignment (operands[0], 8))) + && offsettable_memref_p (operands[0])" + [(clobber (const_int 0))] +{ + enum machine_mode half_mode; + rtx dest1, dest2; + + /* We can be expanded for DFmode or integral vector modes. */ + if (mode == DFmode) + half_mode = SFmode; + else + half_mode = SImode; + + dest1 = adjust_address (operands[0], half_mode, 0); + dest2 = adjust_address (operands[0], half_mode, 4); + + emit_move_insn_1 (dest1, CONST0_RTX (half_mode)); + emit_move_insn_1 (dest2, CONST0_RTX (half_mode)); + DONE; +}) + +(define_split + [(set (match_operand:V64 0 "register_operand" "") + (match_operand:V64 1 "const_zero_operand" ""))] + "reload_completed + && ! TARGET_ARCH64 + && ((GET_CODE (operands[0]) == REG + && REGNO (operands[0]) < 32) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) < 32))" + [(clobber (const_int 0))] +{ + enum machine_mode half_mode; + rtx set_dest = operands[0]; + rtx dest1, dest2; + + /* We can be expanded for DFmode or integral vector modes. */ + if (mode == DFmode) + half_mode = SFmode; + else + half_mode = SImode; + + dest1 = gen_highpart (half_mode, set_dest); + dest2 = gen_lowpart (half_mode, set_dest); + emit_move_insn_1 (dest1, CONST0_RTX (half_mode)); + emit_move_insn_1 (dest2, CONST0_RTX (half_mode)); + DONE; +}) + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "" +{ + if (sparc_expand_move (TFmode, operands)) + DONE; +}) + +(define_insn "*movtf_insn_sp32" + [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e, o,U, r") + (match_operand:TF 1 "input_operand" " G,oe,GeUr,o,roG"))] + "TARGET_FPU + && ! TARGET_ARCH64 + && (register_operand (operands[0], TFmode) + || register_or_zero_operand (operands[1], TFmode))" + "#" + [(set_attr "length" "4")]) + +;; Exactly the same as above, except that all `e' cases are deleted. +;; This is necessary to prevent reload from ever trying to use a `e' reg +;; when -mno-fpu. + +(define_insn "*movtf_insn_sp32_no_fpu" + [(set (match_operand:TF 0 "nonimmediate_operand" "=o,U,o, r,o") + (match_operand:TF 1 "input_operand" " G,o,U,roG,r"))] + "! TARGET_FPU + && ! TARGET_ARCH64 + && (register_operand (operands[0], TFmode) + || register_or_zero_operand (operands[1], TFmode))" + "#" + [(set_attr "length" "4")]) + +(define_insn "*movtf_insn_sp64" + [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e, o, r") + (match_operand:TF 1 "input_operand" "G,oe,Ger,roG"))] + "TARGET_FPU + && TARGET_ARCH64 + && ! TARGET_HARD_QUAD + && (register_operand (operands[0], TFmode) + || register_or_zero_operand (operands[1], TFmode))" + "#" + [(set_attr "length" "2")]) + +(define_insn "*movtf_insn_sp64_hq" + [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m, o, r") + (match_operand:TF 1 "input_operand" "G,e,m,e,rG,roG"))] + "TARGET_FPU + && TARGET_ARCH64 + && TARGET_HARD_QUAD + && (register_operand (operands[0], TFmode) + || register_or_zero_operand (operands[1], TFmode))" + "@ + # + fmovq\t%1, %0 + ldq\t%1, %0 + stq\t%1, %0 + # + #" + [(set_attr "type" "*,fpmove,fpload,fpstore,*,*") + (set_attr "length" "2,*,*,*,2,2")]) + +(define_insn "*movtf_insn_sp64_no_fpu" + [(set (match_operand:TF 0 "nonimmediate_operand" "= r, o") + (match_operand:TF 1 "input_operand" "orG,rG"))] + "! TARGET_FPU + && TARGET_ARCH64 + && (register_operand (operands[0], TFmode) + || register_or_zero_operand (operands[1], TFmode))" + "#" + [(set_attr "length" "2")]) + +;; Now all the splits to handle multi-insn TF mode moves. +(define_split + [(set (match_operand:TF 0 "register_operand" "") + (match_operand:TF 1 "register_operand" ""))] + "reload_completed + && (! TARGET_ARCH64 + || (TARGET_FPU + && ! TARGET_HARD_QUAD) + || ! fp_register_operand (operands[0], TFmode))" + [(clobber (const_int 0))] +{ + rtx set_dest = operands[0]; + rtx set_src = operands[1]; + rtx dest1, dest2; + rtx src1, src2; + + dest1 = gen_df_reg (set_dest, 0); + dest2 = gen_df_reg (set_dest, 1); + src1 = gen_df_reg (set_src, 0); + src2 = gen_df_reg (set_src, 1); + + /* Now emit using the real source and destination we found, swapping + the order if we detect overlap. */ + if (reg_overlap_mentioned_p (dest1, src2)) + { + emit_insn (gen_movdf (dest2, src2)); + emit_insn (gen_movdf (dest1, src1)); + } + else + { + emit_insn (gen_movdf (dest1, src1)); + emit_insn (gen_movdf (dest2, src2)); + } + DONE; +}) + +(define_split + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "const_zero_operand" ""))] + "reload_completed" + [(clobber (const_int 0))] +{ + rtx set_dest = operands[0]; + rtx dest1, dest2; + + switch (GET_CODE (set_dest)) + { + case REG: + dest1 = gen_df_reg (set_dest, 0); + dest2 = gen_df_reg (set_dest, 1); + break; + case MEM: + dest1 = adjust_address (set_dest, DFmode, 0); + dest2 = adjust_address (set_dest, DFmode, 8); + break; + default: + gcc_unreachable (); + } + + emit_insn (gen_movdf (dest1, CONST0_RTX (DFmode))); + emit_insn (gen_movdf (dest2, CONST0_RTX (DFmode))); + DONE; +}) + +(define_split + [(set (match_operand:TF 0 "register_operand" "") + (match_operand:TF 1 "memory_operand" ""))] + "(reload_completed + && offsettable_memref_p (operands[1]) + && (! TARGET_ARCH64 + || ! TARGET_HARD_QUAD + || ! fp_register_operand (operands[0], TFmode)))" + [(clobber (const_int 0))] +{ + rtx word0 = adjust_address (operands[1], DFmode, 0); + rtx word1 = adjust_address (operands[1], DFmode, 8); + rtx set_dest, dest1, dest2; + + set_dest = operands[0]; + + dest1 = gen_df_reg (set_dest, 0); + dest2 = gen_df_reg (set_dest, 1); + + /* Now output, ordering such that we don't clobber any registers + mentioned in the address. */ + if (reg_overlap_mentioned_p (dest1, word1)) + + { + emit_insn (gen_movdf (dest2, word1)); + emit_insn (gen_movdf (dest1, word0)); + } + else + { + emit_insn (gen_movdf (dest1, word0)); + emit_insn (gen_movdf (dest2, word1)); + } + DONE; +}) + +(define_split + [(set (match_operand:TF 0 "memory_operand" "") + (match_operand:TF 1 "register_operand" ""))] + "(reload_completed + && offsettable_memref_p (operands[0]) + && (! TARGET_ARCH64 + || ! TARGET_HARD_QUAD + || ! fp_register_operand (operands[1], TFmode)))" + [(clobber (const_int 0))] +{ + rtx set_src = operands[1]; + + emit_insn (gen_movdf (adjust_address (operands[0], DFmode, 0), + gen_df_reg (set_src, 0))); + emit_insn (gen_movdf (adjust_address (operands[0], DFmode, 8), + gen_df_reg (set_src, 1))); + DONE; +}) + + +;; SPARC-V9 conditional move instructions + +;; We can handle larger constants here for some flavors, but for now we keep +;; it simple and only allow those constants supported by all flavors. +;; Note that emit_conditional_move canonicalizes operands 2,3 so that operand +;; 3 contains the constant if one is present, but we handle either for +;; generality (sparc.c puts a constant in operand 2). + +(define_expand "movcc" + [(set (match_operand:I 0 "register_operand" "") + (if_then_else:I (match_operand 1 "comparison_operator" "") + (match_operand:I 2 "arith10_operand" "") + (match_operand:I 3 "arith10_operand" "")))] + "TARGET_V9 && !(mode == DImode && TARGET_ARCH32)" +{ + rtx cc_reg; + + if (GET_MODE (XEXP (operands[1], 0)) == DImode && !TARGET_ARCH64) + FAIL; + + if (GET_MODE (XEXP (operands[1], 0)) == TFmode && !TARGET_HARD_QUAD) + operands[1] + = sparc_emit_float_lib_cmp (XEXP (operands[1], 0), XEXP (operands[1], 1), + GET_CODE (operands[1])); + + if (XEXP (operands[1], 1) == const0_rtx + && GET_CODE (XEXP (operands[1], 0)) == REG + && GET_MODE (XEXP (operands[1], 0)) == DImode + && v9_regcmp_p (GET_CODE (operands[1]))) + cc_reg = XEXP (operands[1], 0); + else + cc_reg = gen_compare_reg (operands[1]); + + operands[1] + = gen_rtx_fmt_ee (GET_CODE (operands[1]), GET_MODE (cc_reg), cc_reg, + const0_rtx); +}) + +(define_expand "movcc" + [(set (match_operand:F 0 "register_operand" "") + (if_then_else:F (match_operand 1 "comparison_operator" "") + (match_operand:F 2 "register_operand" "") + (match_operand:F 3 "register_operand" "")))] + "TARGET_V9 && TARGET_FPU" +{ + rtx cc_reg; + + if (GET_MODE (XEXP (operands[1], 0)) == DImode && !TARGET_ARCH64) + FAIL; + + if (GET_MODE (XEXP (operands[1], 0)) == TFmode && !TARGET_HARD_QUAD) + operands[1] + = sparc_emit_float_lib_cmp (XEXP (operands[1], 0), XEXP (operands[1], 1), + GET_CODE (operands[1])); + + if (XEXP (operands[1], 1) == const0_rtx + && GET_CODE (XEXP (operands[1], 0)) == REG + && GET_MODE (XEXP (operands[1], 0)) == DImode + && v9_regcmp_p (GET_CODE (operands[1]))) + cc_reg = XEXP (operands[1], 0); + else + cc_reg = gen_compare_reg (operands[1]); + + operands[1] + = gen_rtx_fmt_ee (GET_CODE (operands[1]), GET_MODE (cc_reg), cc_reg, + const0_rtx); +}) + +;; Conditional move define_insns + +(define_insn "*mov_cc_v9" + [(set (match_operand:I 0 "register_operand" "=r,r") + (if_then_else:I (match_operator 1 "comparison_operator" + [(match_operand 2 "icc_or_fcc_register_operand" "X,X") + (const_int 0)]) + (match_operand:I 3 "arith11_operand" "rL,0") + (match_operand:I 4 "arith11_operand" "0,rL")))] + "TARGET_V9 && !(mode == DImode && TARGET_ARCH32)" + "@ + mov%C1\t%x2, %3, %0 + mov%c1\t%x2, %4, %0" + [(set_attr "type" "cmove")]) + +(define_insn "*mov_cc_reg_sp64" + [(set (match_operand:I 0 "register_operand" "=r,r") + (if_then_else:I (match_operator 1 "v9_register_compare_operator" + [(match_operand:DI 2 "register_operand" "r,r") + (const_int 0)]) + (match_operand:I 3 "arith10_operand" "rM,0") + (match_operand:I 4 "arith10_operand" "0,rM")))] + "TARGET_ARCH64" + "@ + movr%D1\t%2, %r3, %0 + movr%d1\t%2, %r4, %0" + [(set_attr "type" "cmove")]) + +(define_insn "*movsf_cc_v9" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (if_then_else:SF (match_operator 1 "comparison_operator" + [(match_operand 2 "icc_or_fcc_register_operand" "X,X") + (const_int 0)]) + (match_operand:SF 3 "register_operand" "f,0") + (match_operand:SF 4 "register_operand" "0,f")))] + "TARGET_V9 && TARGET_FPU" + "@ + fmovs%C1\t%x2, %3, %0 + fmovs%c1\t%x2, %4, %0" + [(set_attr "type" "fpcmove")]) + +(define_insn "*movsf_cc_reg_sp64" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (if_then_else:SF (match_operator 1 "v9_register_compare_operator" + [(match_operand:DI 2 "register_operand" "r,r") + (const_int 0)]) + (match_operand:SF 3 "register_operand" "f,0") + (match_operand:SF 4 "register_operand" "0,f")))] + "TARGET_ARCH64 && TARGET_FPU" + "@ + fmovrs%D1\t%2, %3, %0 + fmovrs%d1\t%2, %4, %0" + [(set_attr "type" "fpcrmove")]) + +;; Named because invoked by movtf_cc_v9 +(define_insn "movdf_cc_v9" + [(set (match_operand:DF 0 "register_operand" "=e,e") + (if_then_else:DF (match_operator 1 "comparison_operator" + [(match_operand 2 "icc_or_fcc_register_operand" "X,X") + (const_int 0)]) + (match_operand:DF 3 "register_operand" "e,0") + (match_operand:DF 4 "register_operand" "0,e")))] + "TARGET_V9 && TARGET_FPU" + "@ + fmovd%C1\t%x2, %3, %0 + fmovd%c1\t%x2, %4, %0" + [(set_attr "type" "fpcmove") + (set_attr "fptype" "double")]) + +;; Named because invoked by movtf_cc_reg_sp64 +(define_insn "movdf_cc_reg_sp64" + [(set (match_operand:DF 0 "register_operand" "=e,e") + (if_then_else:DF (match_operator 1 "v9_register_compare_operator" + [(match_operand:DI 2 "register_operand" "r,r") + (const_int 0)]) + (match_operand:DF 3 "register_operand" "e,0") + (match_operand:DF 4 "register_operand" "0,e")))] + "TARGET_ARCH64 && TARGET_FPU" + "@ + fmovrd%D1\t%2, %3, %0 + fmovrd%d1\t%2, %4, %0" + [(set_attr "type" "fpcrmove") + (set_attr "fptype" "double")]) + +(define_insn "*movtf_cc_hq_v9" + [(set (match_operand:TF 0 "register_operand" "=e,e") + (if_then_else:TF (match_operator 1 "comparison_operator" + [(match_operand 2 "icc_or_fcc_register_operand" "X,X") + (const_int 0)]) + (match_operand:TF 3 "register_operand" "e,0") + (match_operand:TF 4 "register_operand" "0,e")))] + "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD" + "@ + fmovq%C1\t%x2, %3, %0 + fmovq%c1\t%x2, %4, %0" + [(set_attr "type" "fpcmove")]) + +(define_insn "*movtf_cc_reg_hq_sp64" + [(set (match_operand:TF 0 "register_operand" "=e,e") + (if_then_else:TF (match_operator 1 "v9_register_compare_operator" + [(match_operand:DI 2 "register_operand" "r,r") + (const_int 0)]) + (match_operand:TF 3 "register_operand" "e,0") + (match_operand:TF 4 "register_operand" "0,e")))] + "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD" + "@ + fmovrq%D1\t%2, %3, %0 + fmovrq%d1\t%2, %4, %0" + [(set_attr "type" "fpcrmove")]) + +(define_insn_and_split "*movtf_cc_v9" + [(set (match_operand:TF 0 "register_operand" "=e,e") + (if_then_else:TF (match_operator 1 "comparison_operator" + [(match_operand 2 "icc_or_fcc_register_operand" "X,X") + (const_int 0)]) + (match_operand:TF 3 "register_operand" "e,0") + (match_operand:TF 4 "register_operand" "0,e")))] + "TARGET_V9 && TARGET_FPU && !TARGET_HARD_QUAD" + "#" + "&& reload_completed" + [(clobber (const_int 0))] +{ + rtx set_dest = operands[0]; + rtx set_srca = operands[3]; + rtx set_srcb = operands[4]; + int third = rtx_equal_p (set_dest, set_srca); + rtx dest1, dest2; + rtx srca1, srca2, srcb1, srcb2; + + dest1 = gen_df_reg (set_dest, 0); + dest2 = gen_df_reg (set_dest, 1); + srca1 = gen_df_reg (set_srca, 0); + srca2 = gen_df_reg (set_srca, 1); + srcb1 = gen_df_reg (set_srcb, 0); + srcb2 = gen_df_reg (set_srcb, 1); + + /* Now emit using the real source and destination we found, swapping + the order if we detect overlap. */ + if ((third && reg_overlap_mentioned_p (dest1, srcb2)) + || (!third && reg_overlap_mentioned_p (dest1, srca2))) + { + emit_insn (gen_movdf_cc_v9 (dest2, operands[1], operands[2], srca2, srcb2)); + emit_insn (gen_movdf_cc_v9 (dest1, operands[1], operands[2], srca1, srcb1)); + } + else + { + emit_insn (gen_movdf_cc_v9 (dest1, operands[1], operands[2], srca1, srcb1)); + emit_insn (gen_movdf_cc_v9 (dest2, operands[1], operands[2], srca2, srcb2)); + } + DONE; +} + [(set_attr "length" "2")]) + +(define_insn_and_split "*movtf_cc_reg_sp64" + [(set (match_operand:TF 0 "register_operand" "=e,e") + (if_then_else:TF (match_operator 1 "v9_register_compare_operator" + [(match_operand:DI 2 "register_operand" "r,r") + (const_int 0)]) + (match_operand:TF 3 "register_operand" "e,0") + (match_operand:TF 4 "register_operand" "0,e")))] + "TARGET_ARCH64 && TARGET_FPU && ! TARGET_HARD_QUAD" + "#" + "&& reload_completed" + [(clobber (const_int 0))] +{ + rtx set_dest = operands[0]; + rtx set_srca = operands[3]; + rtx set_srcb = operands[4]; + int third = rtx_equal_p (set_dest, set_srca); + rtx dest1, dest2; + rtx srca1, srca2, srcb1, srcb2; + + dest1 = gen_df_reg (set_dest, 0); + dest2 = gen_df_reg (set_dest, 1); + srca1 = gen_df_reg (set_srca, 0); + srca2 = gen_df_reg (set_srca, 1); + srcb1 = gen_df_reg (set_srcb, 0); + srcb2 = gen_df_reg (set_srcb, 1); + + /* Now emit using the real source and destination we found, swapping + the order if we detect overlap. */ + if ((third && reg_overlap_mentioned_p (dest1, srcb2)) + || (!third && reg_overlap_mentioned_p (dest1, srca2))) + { + emit_insn (gen_movdf_cc_reg_sp64 (dest2, operands[1], operands[2], srca2, srcb2)); + emit_insn (gen_movdf_cc_reg_sp64 (dest1, operands[1], operands[2], srca1, srcb1)); + } + else + { + emit_insn (gen_movdf_cc_reg_sp64 (dest1, operands[1], operands[2], srca1, srcb1)); + emit_insn (gen_movdf_cc_reg_sp64 (dest2, operands[1], operands[2], srca2, srcb2)); + } + DONE; +} + [(set_attr "length" "2")]) + + +;; Zero-extension instructions + +;; These patterns originally accepted general_operands, however, slightly +;; better code is generated by only accepting register_operands, and then +;; letting combine generate the ldu[hb] insns. + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "register_operand" "")))] + "" +{ + rtx temp = gen_reg_rtx (SImode); + rtx shift_16 = GEN_INT (16); + int op1_subbyte = 0; + + if (GET_CODE (operand1) == SUBREG) + { + op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte /= GET_MODE_SIZE (SImode); + op1_subbyte *= GET_MODE_SIZE (SImode); + operand1 = XEXP (operand1, 0); + } + + emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte), + shift_16)); + emit_insn (gen_lshrsi3 (operand0, temp, shift_16)); + DONE; +}) + +(define_insn "*zero_extendhisi2_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))] + "" + "lduh\t%1, %0" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_expand "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "register_operand" "")))] + "" + "") + +(define_insn "*zero_extendqihi2_insn" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (zero_extend:HI (match_operand:QI 1 "input_operand" "r,m")))] + "GET_CODE (operands[1]) != CONST_INT" + "@ + and\t%1, 0xff, %0 + ldub\t%1, %0" + [(set_attr "type" "*,load") + (set_attr "us3load_type" "*,3cycle")]) + +(define_expand "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "register_operand" "")))] + "" + "") + +(define_insn "*zero_extendqisi2_insn" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "input_operand" "r,m")))] + "GET_CODE (operands[1]) != CONST_INT" + "@ + and\t%1, 0xff, %0 + ldub\t%1, %0" + [(set_attr "type" "*,load") + (set_attr "us3load_type" "*,3cycle")]) + +(define_expand "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:QI 1 "register_operand" "")))] + "TARGET_ARCH64" + "") + +(define_insn "*zero_extendqidi2_insn" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:QI 1 "input_operand" "r,m")))] + "TARGET_ARCH64 && GET_CODE (operands[1]) != CONST_INT" + "@ + and\t%1, 0xff, %0 + ldub\t%1, %0" + [(set_attr "type" "*,load") + (set_attr "us3load_type" "*,3cycle")]) + +(define_expand "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:HI 1 "register_operand" "")))] + "TARGET_ARCH64" +{ + rtx temp = gen_reg_rtx (DImode); + rtx shift_48 = GEN_INT (48); + int op1_subbyte = 0; + + if (GET_CODE (operand1) == SUBREG) + { + op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte /= GET_MODE_SIZE (DImode); + op1_subbyte *= GET_MODE_SIZE (DImode); + operand1 = XEXP (operand1, 0); + } + + emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte), + shift_48)); + emit_insn (gen_lshrdi3 (operand0, temp, shift_48)); + DONE; +}) + +(define_insn "*zero_extendhidi2_insn" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:HI 1 "memory_operand" "m")))] + "TARGET_ARCH64" + "lduh\t%1, %0" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +;; ??? Write truncdisi pattern using sra? + +(define_expand "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:SI 1 "register_operand" "")))] + "" + "") + +(define_insn "*zero_extendsidi2_insn_sp64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:SI 1 "input_operand" "r,m")))] + "TARGET_ARCH64 && GET_CODE (operands[1]) != CONST_INT" + "@ + srl\t%1, 0, %0 + lduw\t%1, %0" + [(set_attr "type" "shift,load")]) + +(define_insn_and_split "*zero_extendsidi2_insn_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))] + "! TARGET_ARCH64" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + rtx dest1, dest2; + + dest1 = gen_highpart (SImode, operands[0]); + dest2 = gen_lowpart (SImode, operands[0]); + + /* Swap the order in case of overlap. */ + if (REGNO (dest1) == REGNO (operands[1])) + { + operands[2] = dest2; + operands[3] = operands[1]; + operands[4] = dest1; + operands[5] = const0_rtx; + } + else + { + operands[2] = dest1; + operands[3] = const0_rtx; + operands[4] = dest2; + operands[5] = operands[1]; + } +} + [(set_attr "length" "2")]) + +;; Simplify comparisons of extended values. + +(define_insn "*cmp_zero_extendqisi2" + [(set (reg:CC 100) + (compare:CC (zero_extend:SI (match_operand:QI 0 "register_operand" "r")) + (const_int 0)))] + "" + "andcc\t%0, 0xff, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_zero_qi" + [(set (reg:CC 100) + (compare:CC (match_operand:QI 0 "register_operand" "r") + (const_int 0)))] + "" + "andcc\t%0, 0xff, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_zero_extendqisi2_set" + [(set (reg:CC 100) + (compare:CC (zero_extend:SI (match_operand:QI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_dup 1)))] + "" + "andcc\t%1, 0xff, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_zero_extendqisi2_andcc_set" + [(set (reg:CC 100) + (compare:CC (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 255)) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (subreg:QI (match_dup 1) 0)))] + "" + "andcc\t%1, 0xff, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_zero_extendqidi2" + [(set (reg:CCX 100) + (compare:CCX (zero_extend:DI (match_operand:QI 0 "register_operand" "r")) + (const_int 0)))] + "TARGET_ARCH64" + "andcc\t%0, 0xff, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_zero_qi_sp64" + [(set (reg:CCX 100) + (compare:CCX (match_operand:QI 0 "register_operand" "r") + (const_int 0)))] + "TARGET_ARCH64" + "andcc\t%0, 0xff, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_zero_extendqidi2_set" + [(set (reg:CCX 100) + (compare:CCX (zero_extend:DI (match_operand:QI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_dup 1)))] + "TARGET_ARCH64" + "andcc\t%1, 0xff, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_zero_extendqidi2_andcc_set" + [(set (reg:CCX 100) + (compare:CCX (and:DI (match_operand:DI 1 "register_operand" "r") + (const_int 255)) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (subreg:QI (match_dup 1) 0)))] + "TARGET_ARCH64" + "andcc\t%1, 0xff, %0" + [(set_attr "type" "compare")]) + +;; Similarly, handle {SI,DI}->QI mode truncation followed by a compare. + +(define_insn "*cmp_siqi_trunc" + [(set (reg:CC 100) + (compare:CC (subreg:QI (match_operand:SI 0 "register_operand" "r") 3) + (const_int 0)))] + "" + "andcc\t%0, 0xff, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_siqi_trunc_set" + [(set (reg:CC 100) + (compare:CC (subreg:QI (match_operand:SI 1 "register_operand" "r") 3) + (const_int 0))) + (set (match_operand:QI 0 "register_operand" "=r") + (subreg:QI (match_dup 1) 3))] + "" + "andcc\t%1, 0xff, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_diqi_trunc" + [(set (reg:CC 100) + (compare:CC (subreg:QI (match_operand:DI 0 "register_operand" "r") 7) + (const_int 0)))] + "TARGET_ARCH64" + "andcc\t%0, 0xff, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_diqi_trunc_set" + [(set (reg:CC 100) + (compare:CC (subreg:QI (match_operand:DI 1 "register_operand" "r") 7) + (const_int 0))) + (set (match_operand:QI 0 "register_operand" "=r") + (subreg:QI (match_dup 1) 7))] + "TARGET_ARCH64" + "andcc\t%1, 0xff, %0" + [(set_attr "type" "compare")]) + + +;; Sign-extension instructions + +;; These patterns originally accepted general_operands, however, slightly +;; better code is generated by only accepting register_operands, and then +;; letting combine generate the lds[hb] insns. + +(define_expand "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:HI 1 "register_operand" "")))] + "" +{ + rtx temp = gen_reg_rtx (SImode); + rtx shift_16 = GEN_INT (16); + int op1_subbyte = 0; + + if (GET_CODE (operand1) == SUBREG) + { + op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte /= GET_MODE_SIZE (SImode); + op1_subbyte *= GET_MODE_SIZE (SImode); + operand1 = XEXP (operand1, 0); + } + + emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte), + shift_16)); + emit_insn (gen_ashrsi3 (operand0, temp, shift_16)); + DONE; +}) + +(define_insn "*sign_extendhisi2_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:HI 1 "memory_operand" "m")))] + "" + "ldsh\t%1, %0" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_expand "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "") + (sign_extend:HI (match_operand:QI 1 "register_operand" "")))] + "" +{ + rtx temp = gen_reg_rtx (SImode); + rtx shift_24 = GEN_INT (24); + int op1_subbyte = 0; + int op0_subbyte = 0; + + if (GET_CODE (operand1) == SUBREG) + { + op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte /= GET_MODE_SIZE (SImode); + op1_subbyte *= GET_MODE_SIZE (SImode); + operand1 = XEXP (operand1, 0); + } + if (GET_CODE (operand0) == SUBREG) + { + op0_subbyte = SUBREG_BYTE (operand0); + op0_subbyte /= GET_MODE_SIZE (SImode); + op0_subbyte *= GET_MODE_SIZE (SImode); + operand0 = XEXP (operand0, 0); + } + emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte), + shift_24)); + if (GET_MODE (operand0) != SImode) + operand0 = gen_rtx_SUBREG (SImode, operand0, op0_subbyte); + emit_insn (gen_ashrsi3 (operand0, temp, shift_24)); + DONE; +}) + +(define_insn "*sign_extendqihi2_insn" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI (match_operand:QI 1 "memory_operand" "m")))] + "" + "ldsb\t%1, %0" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_expand "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:QI 1 "register_operand" "")))] + "" +{ + rtx temp = gen_reg_rtx (SImode); + rtx shift_24 = GEN_INT (24); + int op1_subbyte = 0; + + if (GET_CODE (operand1) == SUBREG) + { + op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte /= GET_MODE_SIZE (SImode); + op1_subbyte *= GET_MODE_SIZE (SImode); + operand1 = XEXP (operand1, 0); + } + + emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte), + shift_24)); + emit_insn (gen_ashrsi3 (operand0, temp, shift_24)); + DONE; +}) + +(define_insn "*sign_extendqisi2_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:QI 1 "memory_operand" "m")))] + "" + "ldsb\t%1, %0" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_expand "extendqidi2" + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:QI 1 "register_operand" "")))] + "TARGET_ARCH64" +{ + rtx temp = gen_reg_rtx (DImode); + rtx shift_56 = GEN_INT (56); + int op1_subbyte = 0; + + if (GET_CODE (operand1) == SUBREG) + { + op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte /= GET_MODE_SIZE (DImode); + op1_subbyte *= GET_MODE_SIZE (DImode); + operand1 = XEXP (operand1, 0); + } + + emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte), + shift_56)); + emit_insn (gen_ashrdi3 (operand0, temp, shift_56)); + DONE; +}) + +(define_insn "*sign_extendqidi2_insn" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:QI 1 "memory_operand" "m")))] + "TARGET_ARCH64" + "ldsb\t%1, %0" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_expand "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:HI 1 "register_operand" "")))] + "TARGET_ARCH64" +{ + rtx temp = gen_reg_rtx (DImode); + rtx shift_48 = GEN_INT (48); + int op1_subbyte = 0; + + if (GET_CODE (operand1) == SUBREG) + { + op1_subbyte = SUBREG_BYTE (operand1); + op1_subbyte /= GET_MODE_SIZE (DImode); + op1_subbyte *= GET_MODE_SIZE (DImode); + operand1 = XEXP (operand1, 0); + } + + emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte), + shift_48)); + emit_insn (gen_ashrdi3 (operand0, temp, shift_48)); + DONE; +}) + +(define_insn "*sign_extendhidi2_insn" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:HI 1 "memory_operand" "m")))] + "TARGET_ARCH64" + "ldsh\t%1, %0" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_expand "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" "")))] + "TARGET_ARCH64" + "") + +(define_insn "*sign_extendsidi2_insn" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI (match_operand:SI 1 "input_operand" "r,m")))] + "TARGET_ARCH64" + "@ + sra\t%1, 0, %0 + ldsw\t%1, %0" + [(set_attr "type" "shift,sload") + (set_attr "us3load_type" "*,3cycle")]) + + +;; Special pattern for optimizing bit-field compares. This is needed +;; because combine uses this as a canonical form. + +(define_insn "*cmp_zero_extract" + [(set (reg:CC 100) + (compare:CC + (zero_extract:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "small_int_operand" "I") + (match_operand:SI 2 "small_int_operand" "I")) + (const_int 0)))] + "INTVAL (operands[2]) > 19" +{ + int len = INTVAL (operands[1]); + int pos = 32 - INTVAL (operands[2]) - len; + HOST_WIDE_INT mask = ((1 << len) - 1) << pos; + operands[1] = GEN_INT (mask); + return "andcc\t%0, %1, %%g0"; +} + [(set_attr "type" "compare")]) + +(define_insn "*cmp_zero_extract_sp64" + [(set (reg:CCX 100) + (compare:CCX + (zero_extract:DI (match_operand:DI 0 "register_operand" "r") + (match_operand:SI 1 "small_int_operand" "I") + (match_operand:SI 2 "small_int_operand" "I")) + (const_int 0)))] + "TARGET_ARCH64 && INTVAL (operands[2]) > 51" +{ + int len = INTVAL (operands[1]); + int pos = 64 - INTVAL (operands[2]) - len; + HOST_WIDE_INT mask = (((unsigned HOST_WIDE_INT) 1 << len) - 1) << pos; + operands[1] = GEN_INT (mask); + return "andcc\t%0, %1, %%g0"; +} + [(set_attr "type" "compare")]) + + +;; Conversions between float, double and long double. + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=e") + (float_extend:DF + (match_operand:SF 1 "register_operand" "f")))] + "TARGET_FPU" + "fstod\t%1, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_expand "extendsftf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (float_extend:TF + (match_operand:SF 1 "register_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_cvt (FLOAT_EXTEND, operands); DONE;") + +(define_insn "*extendsftf2_hq" + [(set (match_operand:TF 0 "register_operand" "=e") + (float_extend:TF + (match_operand:SF 1 "register_operand" "f")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "fstoq\t%1, %0" + [(set_attr "type" "fp")]) + +(define_expand "extenddftf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (float_extend:TF + (match_operand:DF 1 "register_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_cvt (FLOAT_EXTEND, operands); DONE;") + +(define_insn "*extenddftf2_hq" + [(set (match_operand:TF 0 "register_operand" "=e") + (float_extend:TF + (match_operand:DF 1 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "fdtoq\t%1, %0" + [(set_attr "type" "fp")]) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float_truncate:SF + (match_operand:DF 1 "register_operand" "e")))] + "TARGET_FPU" + "fdtos\t%1, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_expand "trunctfsf2" + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:TF 1 "general_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_cvt (FLOAT_TRUNCATE, operands); DONE;") + +(define_insn "*trunctfsf2_hq" + [(set (match_operand:SF 0 "register_operand" "=f") + (float_truncate:SF + (match_operand:TF 1 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "fqtos\t%1, %0" + [(set_attr "type" "fp")]) + +(define_expand "trunctfdf2" + [(set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF + (match_operand:TF 1 "general_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_cvt (FLOAT_TRUNCATE, operands); DONE;") + +(define_insn "*trunctfdf2_hq" + [(set (match_operand:DF 0 "register_operand" "=e") + (float_truncate:DF + (match_operand:TF 1 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "fqtod\t%1, %0" + [(set_attr "type" "fp")]) + + +;; Conversion between fixed point and floating point. + +(define_insn "floatsisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:SI 1 "register_operand" "f")))] + "TARGET_FPU" + "fitos\t%1, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_insn "floatsidf2" + [(set (match_operand:DF 0 "register_operand" "=e") + (float:DF (match_operand:SI 1 "register_operand" "f")))] + "TARGET_FPU" + "fitod\t%1, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_expand "floatsitf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (float:TF (match_operand:SI 1 "register_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_cvt (FLOAT, operands); DONE;") + +(define_insn "*floatsitf2_hq" + [(set (match_operand:TF 0 "register_operand" "=e") + (float:TF (match_operand:SI 1 "register_operand" "f")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "fitoq\t%1, %0" + [(set_attr "type" "fp")]) + +(define_expand "floatunssitf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (unsigned_float:TF (match_operand:SI 1 "register_operand" "")))] + "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD" + "emit_tfmode_cvt (UNSIGNED_FLOAT, operands); DONE;") + +;; Now the same for 64 bit sources. + +(define_insn "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:DI 1 "register_operand" "e")))] + "TARGET_V9 && TARGET_FPU" + "fxtos\t%1, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_expand "floatunsdisf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:DI 1 "general_operand" ""))] + "TARGET_ARCH64 && TARGET_FPU" + "sparc_emit_floatunsdi (operands, SFmode); DONE;") + +(define_insn "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "=e") + (float:DF (match_operand:DI 1 "register_operand" "e")))] + "TARGET_V9 && TARGET_FPU" + "fxtod\t%1, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_expand "floatunsdidf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DI 1 "general_operand" ""))] + "TARGET_ARCH64 && TARGET_FPU" + "sparc_emit_floatunsdi (operands, DFmode); DONE;") + +(define_expand "floatditf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (float:TF (match_operand:DI 1 "register_operand" "")))] + "TARGET_FPU && TARGET_V9 && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_cvt (FLOAT, operands); DONE;") + +(define_insn "*floatditf2_hq" + [(set (match_operand:TF 0 "register_operand" "=e") + (float:TF (match_operand:DI 1 "register_operand" "e")))] + "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD" + "fxtoq\t%1, %0" + [(set_attr "type" "fp")]) + +(define_expand "floatunsditf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (unsigned_float:TF (match_operand:DI 1 "register_operand" "")))] + "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD" + "emit_tfmode_cvt (UNSIGNED_FLOAT, operands); DONE;") + +;; Convert a float to an actual integer. +;; Truncation is performed as part of the conversion. + +(define_insn "fix_truncsfsi2" + [(set (match_operand:SI 0 "register_operand" "=f") + (fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))] + "TARGET_FPU" + "fstoi\t%1, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_insn "fix_truncdfsi2" + [(set (match_operand:SI 0 "register_operand" "=f") + (fix:SI (fix:DF (match_operand:DF 1 "register_operand" "e"))))] + "TARGET_FPU" + "fdtoi\t%1, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_expand "fix_trunctfsi2" + [(set (match_operand:SI 0 "register_operand" "") + (fix:SI (match_operand:TF 1 "general_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_cvt (FIX, operands); DONE;") + +(define_insn "*fix_trunctfsi2_hq" + [(set (match_operand:SI 0 "register_operand" "=f") + (fix:SI (match_operand:TF 1 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "fqtoi\t%1, %0" + [(set_attr "type" "fp")]) + +(define_expand "fixuns_trunctfsi2" + [(set (match_operand:SI 0 "register_operand" "") + (unsigned_fix:SI (match_operand:TF 1 "general_operand" "")))] + "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD" + "emit_tfmode_cvt (UNSIGNED_FIX, operands); DONE;") + +;; Now the same, for V9 targets + +(define_insn "fix_truncsfdi2" + [(set (match_operand:DI 0 "register_operand" "=e") + (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))] + "TARGET_V9 && TARGET_FPU" + "fstox\t%1, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_expand "fixuns_truncsfdi2" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:SF 1 "general_operand" ""))] + "TARGET_ARCH64 && TARGET_FPU" + "sparc_emit_fixunsdi (operands, SFmode); DONE;") + +(define_insn "fix_truncdfdi2" + [(set (match_operand:DI 0 "register_operand" "=e") + (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))] + "TARGET_V9 && TARGET_FPU" + "fdtox\t%1, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_expand "fixuns_truncdfdi2" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DF 1 "general_operand" ""))] + "TARGET_ARCH64 && TARGET_FPU" + "sparc_emit_fixunsdi (operands, DFmode); DONE;") + +(define_expand "fix_trunctfdi2" + [(set (match_operand:DI 0 "register_operand" "") + (fix:DI (match_operand:TF 1 "general_operand" "")))] + "TARGET_V9 && TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_cvt (FIX, operands); DONE;") + +(define_insn "*fix_trunctfdi2_hq" + [(set (match_operand:DI 0 "register_operand" "=e") + (fix:DI (match_operand:TF 1 "register_operand" "e")))] + "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD" + "fqtox\t%1, %0" + [(set_attr "type" "fp")]) + +(define_expand "fixuns_trunctfdi2" + [(set (match_operand:DI 0 "register_operand" "") + (unsigned_fix:DI (match_operand:TF 1 "general_operand" "")))] + "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD" + "emit_tfmode_cvt (UNSIGNED_FIX, operands); DONE;") + + +;; Integer addition/subtraction instructions. + +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "arith_double_add_operand" "")))] + "" +{ + if (! TARGET_ARCH64) + { + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, + gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_PLUS (DImode, operands[1], + operands[2])), + gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (CCmode, SPARC_ICC_REG))))); + DONE; + } +}) + +(define_insn_and_split "*adddi3_insn_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "arith_double_operand" "%r") + (match_operand:DI 2 "arith_double_operand" "rHI"))) + (clobber (reg:CC 100))] + "! TARGET_ARCH64" + "#" + "&& reload_completed" + [(parallel [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (plus:SI (match_dup 4) + (match_dup 5)) + (const_int 0))) + (set (match_dup 3) + (plus:SI (match_dup 4) (match_dup 5)))]) + (set (match_dup 6) + (plus:SI (plus:SI (match_dup 7) + (match_dup 8)) + (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] +{ + operands[3] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_lowpart (SImode, operands[2]); + operands[6] = gen_highpart (SImode, operands[0]); + operands[7] = gen_highpart_mode (SImode, DImode, operands[1]); +#if HOST_BITS_PER_WIDE_INT == 32 + if (GET_CODE (operands[2]) == CONST_INT) + { + if (INTVAL (operands[2]) < 0) + operands[8] = constm1_rtx; + else + operands[8] = const0_rtx; + } + else +#endif + operands[8] = gen_highpart_mode (SImode, DImode, operands[2]); +} + [(set_attr "length" "2")]) + +;; LTU here means "carry set" +(define_insn "addx" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "arith_operand" "%r") + (match_operand:SI 2 "arith_operand" "rI")) + (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] + "" + "addx\t%1, %2, %0" + [(set_attr "type" "ialuX")]) + +(define_insn_and_split "*addx_extend_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (plus:SI + (match_operand:SI 1 "register_or_zero_operand" "%rJ") + (match_operand:SI 2 "arith_operand" "rI")) + (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))] + "! TARGET_ARCH64" + "#" + "&& reload_completed" + [(set (match_dup 3) (plus:SI (plus:SI (match_dup 1) (match_dup 2)) + (ltu:SI (reg:CC_NOOV 100) (const_int 0)))) + (set (match_dup 4) (const_int 0))] + "operands[3] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart_mode (SImode, DImode, operands[1]);" + [(set_attr "length" "2")]) + +(define_insn "*addx_extend_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (plus:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ") + (match_operand:SI 2 "arith_operand" "rI")) + (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))] + "TARGET_ARCH64" + "addx\t%r1, %2, %0" + [(set_attr "type" "ialuX")]) + +(define_insn_and_split "*adddi3_extend_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "register_operand" "r"))) + (clobber (reg:CC 100))] + "! TARGET_ARCH64" + "#" + "&& reload_completed" + [(parallel [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (plus:SI (match_dup 3) (match_dup 1)) + (const_int 0))) + (set (match_dup 5) (plus:SI (match_dup 3) (match_dup 1)))]) + (set (match_dup 6) + (plus:SI (plus:SI (match_dup 4) (const_int 0)) + (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] + "operands[3] = gen_lowpart (SImode, operands[2]); + operands[4] = gen_highpart (SImode, operands[2]); + operands[5] = gen_lowpart (SImode, operands[0]); + operands[6] = gen_highpart (SImode, operands[0]);" + [(set_attr "length" "2")]) + +(define_insn "*adddi3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%r,r") + (match_operand:DI 2 "arith_add_operand" "rI,O")))] + "TARGET_ARCH64" + "@ + add\t%1, %2, %0 + sub\t%1, -%2, %0") + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,d") + (plus:SI (match_operand:SI 1 "register_operand" "%r,r,d") + (match_operand:SI 2 "arith_add_operand" "rI,O,d")))] + "" + "@ + add\t%1, %2, %0 + sub\t%1, -%2, %0 + fpadd32s\t%1, %2, %0" + [(set_attr "type" "*,*,fga") + (set_attr "fptype" "*,*,single")]) + +(define_insn "*cmp_cc_plus" + [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (plus:SI (match_operand:SI 0 "arith_operand" "%r") + (match_operand:SI 1 "arith_operand" "rI")) + (const_int 0)))] + "" + "addcc\t%0, %1, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_plus" + [(set (reg:CCX_NOOV 100) + (compare:CCX_NOOV (plus:DI (match_operand:DI 0 "arith_operand" "%r") + (match_operand:DI 1 "arith_operand" "rI")) + (const_int 0)))] + "TARGET_ARCH64" + "addcc\t%0, %1, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_cc_plus_set" + [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (plus:SI (match_operand:SI 1 "arith_operand" "%r") + (match_operand:SI 2 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "addcc\t%1, %2, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_plus_set" + [(set (reg:CCX_NOOV 100) + (compare:CCX_NOOV (plus:DI (match_operand:DI 1 "arith_operand" "%r") + (match_operand:DI 2 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_ARCH64" + "addcc\t%1, %2, %0" + [(set_attr "type" "compare")]) + +(define_expand "subdi3" + [(set (match_operand:DI 0 "register_operand" "") + (minus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "arith_double_add_operand" "")))] + "" +{ + if (! TARGET_ARCH64) + { + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, + gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_MINUS (DImode, operands[1], + operands[2])), + gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (CCmode, SPARC_ICC_REG))))); + DONE; + } +}) + +(define_insn_and_split "*subdi3_insn_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "arith_double_operand" "rHI"))) + (clobber (reg:CC 100))] + "! TARGET_ARCH64" + "#" + "&& reload_completed" + [(parallel [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (minus:SI (match_dup 4) + (match_dup 5)) + (const_int 0))) + (set (match_dup 3) + (minus:SI (match_dup 4) (match_dup 5)))]) + (set (match_dup 6) + (minus:SI (minus:SI (match_dup 7) + (match_dup 8)) + (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] +{ + operands[3] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_lowpart (SImode, operands[2]); + operands[6] = gen_highpart (SImode, operands[0]); + operands[7] = gen_highpart (SImode, operands[1]); +#if HOST_BITS_PER_WIDE_INT == 32 + if (GET_CODE (operands[2]) == CONST_INT) + { + if (INTVAL (operands[2]) < 0) + operands[8] = constm1_rtx; + else + operands[8] = const0_rtx; + } + else +#endif + operands[8] = gen_highpart_mode (SImode, DImode, operands[2]); +} + [(set_attr "length" "2")]) + +;; LTU here means "carry set" +(define_insn "subx" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ") + (match_operand:SI 2 "arith_operand" "rI")) + (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] + "" + "subx\t%r1, %2, %0" + [(set_attr "type" "ialuX")]) + +(define_insn "*subx_extend_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ") + (match_operand:SI 2 "arith_operand" "rI")) + (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))] + "TARGET_ARCH64" + "subx\t%r1, %2, %0" + [(set_attr "type" "ialuX")]) + +(define_insn_and_split "*subx_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ") + (match_operand:SI 2 "arith_operand" "rI")) + (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))] + "! TARGET_ARCH64" + "#" + "&& reload_completed" + [(set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 2)) + (ltu:SI (reg:CC_NOOV 100) (const_int 0)))) + (set (match_dup 4) (const_int 0))] + "operands[3] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[0]);" + [(set_attr "length" "2")]) + +(define_insn_and_split "*subdi3_extend_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "register_operand" "r") + (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))) + (clobber (reg:CC 100))] + "! TARGET_ARCH64" + "#" + "&& reload_completed" + [(parallel [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (minus:SI (match_dup 3) (match_dup 2)) + (const_int 0))) + (set (match_dup 5) (minus:SI (match_dup 3) (match_dup 2)))]) + (set (match_dup 6) + (minus:SI (minus:SI (match_dup 4) (const_int 0)) + (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] + "operands[3] = gen_lowpart (SImode, operands[1]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[5] = gen_lowpart (SImode, operands[0]); + operands[6] = gen_highpart (SImode, operands[0]);" + [(set_attr "length" "2")]) + +(define_insn "*subdi3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (minus:DI (match_operand:DI 1 "register_operand" "r,r") + (match_operand:DI 2 "arith_add_operand" "rI,O")))] + "TARGET_ARCH64" + "@ + sub\t%1, %2, %0 + add\t%1, -%2, %0") + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,d") + (minus:SI (match_operand:SI 1 "register_operand" "r,r,d") + (match_operand:SI 2 "arith_add_operand" "rI,O,d")))] + "" + "@ + sub\t%1, %2, %0 + add\t%1, -%2, %0 + fpsub32s\t%1, %2, %0" + [(set_attr "type" "*,*,fga") + (set_attr "fptype" "*,*,single")]) + +(define_insn "*cmp_minus_cc" + [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (minus:SI (match_operand:SI 0 "register_or_zero_operand" "rJ") + (match_operand:SI 1 "arith_operand" "rI")) + (const_int 0)))] + "" + "subcc\t%r0, %1, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_minus_ccx" + [(set (reg:CCX_NOOV 100) + (compare:CCX_NOOV (minus:DI (match_operand:DI 0 "register_operand" "r") + (match_operand:DI 1 "arith_operand" "rI")) + (const_int 0)))] + "TARGET_ARCH64" + "subcc\t%0, %1, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "cmp_minus_cc_set" + [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ") + (match_operand:SI 2 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_dup 1) (match_dup 2)))] + "" + "subcc\t%r1, %2, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_minus_ccx_set" + [(set (reg:CCX_NOOV 100) + (compare:CCX_NOOV (minus:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_dup 1) (match_dup 2)))] + "TARGET_ARCH64" + "subcc\t%1, %2, %0" + [(set_attr "type" "compare")]) + + +;; Integer multiply/divide instructions. + +;; The 32-bit multiply/divide instructions are deprecated on v9, but at +;; least in UltraSPARC I, II and IIi it is a win tick-wise. + +(define_insn "mulsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (match_operand:SI 1 "arith_operand" "%r") + (match_operand:SI 2 "arith_operand" "rI")))] + "TARGET_HARD_MUL" + "smul\t%1, %2, %0" + [(set_attr "type" "imul")]) + +(define_expand "muldi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "arith_operand" "") + (match_operand:DI 2 "arith_operand" "")))] + "TARGET_ARCH64 || TARGET_V8PLUS" +{ + if (TARGET_V8PLUS) + { + emit_insn (gen_muldi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_insn "*muldi3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (match_operand:DI 1 "arith_operand" "%r") + (match_operand:DI 2 "arith_operand" "rI")))] + "TARGET_ARCH64" + "mulx\t%1, %2, %0" + [(set_attr "type" "imul")]) + +;; V8plus wide multiply. +;; XXX +(define_insn "muldi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=r,h") + (mult:DI (match_operand:DI 1 "arith_operand" "%r,0") + (match_operand:DI 2 "arith_operand" "rI,rI"))) + (clobber (match_scratch:SI 3 "=&h,X")) + (clobber (match_scratch:SI 4 "=&h,X"))] + "TARGET_V8PLUS" +{ + if (sparc_check_64 (operands[1], insn) <= 0) + output_asm_insn ("srl\t%L1, 0, %L1", operands); + if (which_alternative == 1) + output_asm_insn ("sllx\t%H1, 32, %H1", operands); + if (GET_CODE (operands[2]) == CONST_INT) + { + if (which_alternative == 1) + return "or\t%L1, %H1, %H1\n\tmulx\t%H1, %2, %L0\;srlx\t%L0, 32, %H0"; + else + return "sllx\t%H1, 32, %3\n\tor\t%L1, %3, %3\n\tmulx\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"; + } + else if (rtx_equal_p (operands[1], operands[2])) + { + if (which_alternative == 1) + return "or\t%L1, %H1, %H1\n\tmulx\t%H1, %H1, %L0\;srlx\t%L0, 32, %H0"; + else + return "sllx\t%H1, 32, %3\n\tor\t%L1, %3, %3\n\tmulx\t%3, %3, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"; + } + if (sparc_check_64 (operands[2], insn) <= 0) + output_asm_insn ("srl\t%L2, 0, %L2", operands); + if (which_alternative == 1) + return "or\t%L1, %H1, %H1\n\tsllx\t%H2, 32, %L1\n\tor\t%L2, %L1, %L1\n\tmulx\t%H1, %L1, %L0\;srlx\t%L0, 32, %H0"; + else + return "sllx\t%H1, 32, %3\n\tsllx\t%H2, 32, %4\n\tor\t%L1, %3, %3\n\tor\t%L2, %4, %4\n\tmulx\t%3, %4, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"; +} + [(set_attr "type" "multi") + (set_attr "length" "9,8")]) + +(define_insn "*cmp_mul_set" + [(set (reg:CC 100) + (compare:CC (mult:SI (match_operand:SI 1 "arith_operand" "%r") + (match_operand:SI 2 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (match_dup 1) (match_dup 2)))] + "TARGET_V8 || TARGET_SPARCLITE || TARGET_DEPRECATED_V8_INSNS" + "smulcc\t%1, %2, %0" + [(set_attr "type" "imul")]) + +(define_expand "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "")) + (sign_extend:DI (match_operand:SI 2 "arith_operand" ""))))] + "TARGET_HARD_MUL" +{ + if (CONSTANT_P (operands[2])) + { + if (TARGET_V8PLUS) + emit_insn (gen_const_mulsidi3_v8plus (operands[0], operands[1], + operands[2])); + else if (TARGET_ARCH32) + emit_insn (gen_const_mulsidi3_sp32 (operands[0], operands[1], + operands[2])); + else + emit_insn (gen_const_mulsidi3_sp64 (operands[0], operands[1], + operands[2])); + DONE; + } + if (TARGET_V8PLUS) + { + emit_insn (gen_mulsidi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + } +}) + +;; V9 puts the 64-bit product in a 64-bit register. Only out or global +;; registers can hold 64-bit values in the V8plus environment. +;; XXX +(define_insn "mulsidi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=h,r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))) + (clobber (match_scratch:SI 3 "=X,&h"))] + "TARGET_V8PLUS" + "@ + smul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0 + smul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0" + [(set_attr "type" "multi") + (set_attr "length" "2,3")]) + +;; XXX +(define_insn "const_mulsidi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=h,r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (match_operand:DI 2 "small_int_operand" "I,I"))) + (clobber (match_scratch:SI 3 "=X,&h"))] + "TARGET_V8PLUS" + "@ + smul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0 + smul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0" + [(set_attr "type" "multi") + (set_attr "length" "2,3")]) + +;; XXX +(define_insn "*mulsidi3_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))] + "TARGET_HARD_MUL32" +{ + return TARGET_SPARCLET + ? "smuld\t%1, %2, %L0" + : "smul\t%1, %2, %L0\n\trd\t%%y, %H0"; +} + [(set (attr "type") + (if_then_else (eq_attr "isa" "sparclet") + (const_string "imul") (const_string "multi"))) + (set (attr "length") + (if_then_else (eq_attr "isa" "sparclet") + (const_int 1) (const_int 2)))]) + +(define_insn "*mulsidi3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))] + "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64" + "smul\t%1, %2, %0" + [(set_attr "type" "imul")]) + +;; Extra pattern, because sign_extend of a constant isn't valid. + +;; XXX +(define_insn "const_mulsidi3_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "small_int_operand" "I")))] + "TARGET_HARD_MUL32" +{ + return TARGET_SPARCLET + ? "smuld\t%1, %2, %L0" + : "smul\t%1, %2, %L0\n\trd\t%%y, %H0"; +} + [(set (attr "type") + (if_then_else (eq_attr "isa" "sparclet") + (const_string "imul") (const_string "multi"))) + (set (attr "length") + (if_then_else (eq_attr "isa" "sparclet") + (const_int 1) (const_int 2)))]) + +(define_insn "const_mulsidi3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "small_int_operand" "I")))] + "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64" + "smul\t%1, %2, %0" + [(set_attr "type" "imul")]) + +(define_expand "smulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "")) + (sign_extend:DI (match_operand:SI 2 "arith_operand" ""))) + (const_int 32))))] + "TARGET_HARD_MUL && TARGET_ARCH32" +{ + if (CONSTANT_P (operands[2])) + { + if (TARGET_V8PLUS) + { + emit_insn (gen_const_smulsi3_highpart_v8plus (operands[0], + operands[1], + operands[2], + GEN_INT (32))); + DONE; + } + emit_insn (gen_const_smulsi3_highpart (operands[0], operands[1], operands[2])); + DONE; + } + if (TARGET_V8PLUS) + { + emit_insn (gen_smulsi3_highpart_v8plus (operands[0], operands[1], + operands[2], GEN_INT (32))); + DONE; + } +}) + +;; XXX +(define_insn "smulsi3_highpart_v8plus" + [(set (match_operand:SI 0 "register_operand" "=h,r") + (truncate:SI + (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r"))) + (match_operand:SI 3 "small_int_operand" "I,I")))) + (clobber (match_scratch:SI 4 "=X,&h"))] + "TARGET_V8PLUS" + "@ + smul\t%1, %2, %0\;srlx\t%0, %3, %0 + smul\t%1, %2, %4\;srlx\t%4, %3, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +;; The combiner changes TRUNCATE in the previous pattern to SUBREG. +;; XXX +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=h,r") + (subreg:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r"))) + (match_operand:SI 3 "small_int_operand" "I,I")) + 4)) + (clobber (match_scratch:SI 4 "=X,&h"))] + "TARGET_V8PLUS" + "@ + smul\t%1, %2, %0\n\tsrlx\t%0, %3, %0 + smul\t%1, %2, %4\n\tsrlx\t%4, %3, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +;; XXX +(define_insn "const_smulsi3_highpart_v8plus" + [(set (match_operand:SI 0 "register_operand" "=h,r") + (truncate:SI + (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (match_operand:DI 2 "small_int_operand" "I,I")) + (match_operand:SI 3 "small_int_operand" "I,I")))) + (clobber (match_scratch:SI 4 "=X,&h"))] + "TARGET_V8PLUS" + "@ + smul\t%1, %2, %0\n\tsrlx\t%0, %3, %0 + smul\t%1, %2, %4\n\tsrlx\t%4, %3, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +;; XXX +(define_insn "*smulsi3_highpart_sp32" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))) + (const_int 32))))] + "TARGET_HARD_MUL32" + "smul\t%1, %2, %%g0\n\trd\t%%y, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +;; XXX +(define_insn "const_smulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "small_int_operand" "i")) + (const_int 32))))] + "TARGET_HARD_MUL32" + "smul\t%1, %2, %%g0\n\trd\t%%y, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +(define_expand "umulsidi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "uns_arith_operand" ""))))] + "TARGET_HARD_MUL" +{ + if (CONSTANT_P (operands[2])) + { + if (TARGET_V8PLUS) + emit_insn (gen_const_umulsidi3_v8plus (operands[0], operands[1], + operands[2])); + else if (TARGET_ARCH32) + emit_insn (gen_const_umulsidi3_sp32 (operands[0], operands[1], + operands[2])); + else + emit_insn (gen_const_umulsidi3_sp64 (operands[0], operands[1], + operands[2])); + DONE; + } + if (TARGET_V8PLUS) + { + emit_insn (gen_umulsidi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + } +}) + +;; XXX +(define_insn "umulsidi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=h,r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r")))) + (clobber (match_scratch:SI 3 "=X,&h"))] + "TARGET_V8PLUS" + "@ + umul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0 + umul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0" + [(set_attr "type" "multi") + (set_attr "length" "2,3")]) + +;; XXX +(define_insn "*umulsidi3_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))] + "TARGET_HARD_MUL32" +{ + return TARGET_SPARCLET + ? "umuld\t%1, %2, %L0" + : "umul\t%1, %2, %L0\n\trd\t%%y, %H0"; +} + [(set (attr "type") + (if_then_else (eq_attr "isa" "sparclet") + (const_string "imul") (const_string "multi"))) + (set (attr "length") + (if_then_else (eq_attr "isa" "sparclet") + (const_int 1) (const_int 2)))]) + +(define_insn "*umulsidi3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))] + "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64" + "umul\t%1, %2, %0" + [(set_attr "type" "imul")]) + +;; Extra pattern, because sign_extend of a constant isn't valid. + +;; XXX +(define_insn "const_umulsidi3_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "uns_small_int_operand" "")))] + "TARGET_HARD_MUL32" +{ + return TARGET_SPARCLET + ? "umuld\t%1, %s2, %L0" + : "umul\t%1, %s2, %L0\n\trd\t%%y, %H0"; +} + [(set (attr "type") + (if_then_else (eq_attr "isa" "sparclet") + (const_string "imul") (const_string "multi"))) + (set (attr "length") + (if_then_else (eq_attr "isa" "sparclet") + (const_int 1) (const_int 2)))]) + +(define_insn "const_umulsidi3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "uns_small_int_operand" "")))] + "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64" + "umul\t%1, %s2, %0" + [(set_attr "type" "imul")]) + +;; XXX +(define_insn "const_umulsidi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=h,r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (match_operand:DI 2 "uns_small_int_operand" ""))) + (clobber (match_scratch:SI 3 "=X,h"))] + "TARGET_V8PLUS" + "@ + umul\t%1, %s2, %L0\n\tsrlx\t%L0, 32, %H0 + umul\t%1, %s2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0" + [(set_attr "type" "multi") + (set_attr "length" "2,3")]) + +(define_expand "umulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "uns_arith_operand" ""))) + (const_int 32))))] + "TARGET_HARD_MUL && TARGET_ARCH32" +{ + if (CONSTANT_P (operands[2])) + { + if (TARGET_V8PLUS) + { + emit_insn (gen_const_umulsi3_highpart_v8plus (operands[0], + operands[1], + operands[2], + GEN_INT (32))); + DONE; + } + emit_insn (gen_const_umulsi3_highpart (operands[0], operands[1], operands[2])); + DONE; + } + if (TARGET_V8PLUS) + { + emit_insn (gen_umulsi3_highpart_v8plus (operands[0], operands[1], + operands[2], GEN_INT (32))); + DONE; + } +}) + +;; XXX +(define_insn "umulsi3_highpart_v8plus" + [(set (match_operand:SI 0 "register_operand" "=h,r") + (truncate:SI + (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r"))) + (match_operand:SI 3 "small_int_operand" "I,I")))) + (clobber (match_scratch:SI 4 "=X,h"))] + "TARGET_V8PLUS" + "@ + umul\t%1, %2, %0\n\tsrlx\t%0, %3, %0 + umul\t%1, %2, %4\n\tsrlx\t%4, %3, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +;; XXX +(define_insn "const_umulsi3_highpart_v8plus" + [(set (match_operand:SI 0 "register_operand" "=h,r") + (truncate:SI + (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r")) + (match_operand:DI 2 "uns_small_int_operand" "")) + (match_operand:SI 3 "small_int_operand" "I,I")))) + (clobber (match_scratch:SI 4 "=X,h"))] + "TARGET_V8PLUS" + "@ + umul\t%1, %s2, %0\n\tsrlx\t%0, %3, %0 + umul\t%1, %s2, %4\n\tsrlx\t%4, %3, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +;; XXX +(define_insn "*umulsi3_highpart_sp32" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))) + (const_int 32))))] + "TARGET_HARD_MUL32" + "umul\t%1, %2, %%g0\n\trd\t%%y, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +;; XXX +(define_insn "const_umulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "uns_small_int_operand" "")) + (const_int 32))))] + "TARGET_HARD_MUL32" + "umul\t%1, %s2, %%g0\n\trd\t%%y, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +(define_expand "divsi3" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (div:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "input_operand" ""))) + (clobber (match_scratch:SI 3 ""))])] + "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS" +{ + if (TARGET_ARCH64) + { + operands[3] = gen_reg_rtx(SImode); + emit_insn (gen_ashrsi3 (operands[3], operands[1], GEN_INT (31))); + emit_insn (gen_divsi3_sp64 (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } +}) + +;; The V8 architecture specifies that there must be at least 3 instructions +;; between a write to the Y register and a use of it for correct results. +;; We try to fill one of them with a simple constant or a memory load. + +(define_insn "divsi3_sp32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (div:SI (match_operand:SI 1 "register_operand" "r,r,r") + (match_operand:SI 2 "input_operand" "rI,K,m"))) + (clobber (match_scratch:SI 3 "=&r,&r,&r"))] + "(TARGET_V8 || TARGET_DEPRECATED_V8_INSNS) && TARGET_ARCH32" +{ + output_asm_insn ("sra\t%1, 31, %3", operands); + output_asm_insn ("wr\t%3, 0, %%y", operands); + + switch (which_alternative) + { + case 0: + if (TARGET_V9) + return "sdiv\t%1, %2, %0"; + else + return "nop\n\tnop\n\tnop\n\tsdiv\t%1, %2, %0"; + case 1: + if (TARGET_V9) + return "sethi\t%%hi(%a2), %3\n\tsdiv\t%1, %3, %0"; + else + return "sethi\t%%hi(%a2), %3\n\tnop\n\tnop\n\tsdiv\t%1, %3, %0"; + case 2: + if (TARGET_V9) + return "ld\t%2, %3\n\tsdiv\t%1, %3, %0"; + else + return "ld\t%2, %3\n\tnop\n\tnop\n\tsdiv\t%1, %3, %0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "multi") + (set (attr "length") + (if_then_else (eq_attr "isa" "v9") + (const_int 4) (const_int 6)))]) + +(define_insn "divsi3_sp64" + [(set (match_operand:SI 0 "register_operand" "=r") + (div:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "input_operand" "rI"))) + (use (match_operand:SI 3 "register_operand" "r"))] + "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64" + "wr\t%%g0, %3, %%y\n\tsdiv\t%1, %2, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +(define_insn "divdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (div:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "arith_operand" "rI")))] + "TARGET_ARCH64" + "sdivx\t%1, %2, %0" + [(set_attr "type" "idiv")]) + +(define_insn "*cmp_sdiv_cc_set" + [(set (reg:CC 100) + (compare:CC (div:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (div:SI (match_dup 1) (match_dup 2))) + (clobber (match_scratch:SI 3 "=&r"))] + "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS" +{ + output_asm_insn ("sra\t%1, 31, %3", operands); + output_asm_insn ("wr\t%3, 0, %%y", operands); + + if (TARGET_V9) + return "sdivcc\t%1, %2, %0"; + else + return "nop\n\tnop\n\tnop\n\tsdivcc\t%1, %2, %0"; +} + [(set_attr "type" "multi") + (set (attr "length") + (if_then_else (eq_attr "isa" "v9") + (const_int 3) (const_int 6)))]) + +;; XXX +(define_expand "udivsi3" + [(set (match_operand:SI 0 "register_operand" "") + (udiv:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "input_operand" "")))] + "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS" + "") + +;; The V8 architecture specifies that there must be at least 3 instructions +;; between a write to the Y register and a use of it for correct results. +;; We try to fill one of them with a simple constant or a memory load. + +(define_insn "udivsi3_sp32" + [(set (match_operand:SI 0 "register_operand" "=r,&r,&r,&r") + (udiv:SI (match_operand:SI 1 "nonimmediate_operand" "r,r,r,m") + (match_operand:SI 2 "input_operand" "rI,K,m,r")))] + "(TARGET_V8 || TARGET_DEPRECATED_V8_INSNS) && TARGET_ARCH32" +{ + output_asm_insn ("wr\t%%g0, 0, %%y", operands); + + switch (which_alternative) + { + case 0: + if (TARGET_V9) + return "udiv\t%1, %2, %0"; + else + return "nop\n\tnop\n\tnop\n\tudiv\t%1, %2, %0"; + case 1: + if (TARGET_V9) + return "sethi\t%%hi(%a2), %0\n\tudiv\t%1, %0, %0"; + else + return "sethi\t%%hi(%a2), %0\n\tnop\n\tnop\n\tudiv\t%1, %0, %0"; + case 2: + if (TARGET_V9) + return "ld\t%2, %0\n\tudiv\t%1, %0, %0"; + else + return "ld\t%2, %0\n\tnop\n\tnop\n\tudiv\t%1, %0, %0"; + case 3: + if (TARGET_V9) + return "ld\t%1, %0\n\tudiv\t%0, %2, %0"; + else + return "ld\t%1, %0\n\tnop\n\tnop\n\tudiv\t%0, %2, %0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "multi") + (set (attr "length") + (if_then_else (eq_attr "isa" "v9") + (const_int 3) (const_int 5)))]) + +(define_insn "udivsi3_sp64" + [(set (match_operand:SI 0 "register_operand" "=r") + (udiv:SI (match_operand:SI 1 "nonimmediate_operand" "r") + (match_operand:SI 2 "input_operand" "rI")))] + "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64" + "wr\t%%g0, 0, %%y\n\tudiv\t%1, %2, %0" + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +(define_insn "udivdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (udiv:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "arith_operand" "rI")))] + "TARGET_ARCH64" + "udivx\t%1, %2, %0" + [(set_attr "type" "idiv")]) + +(define_insn "*cmp_udiv_cc_set" + [(set (reg:CC 100) + (compare:CC (udiv:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (udiv:SI (match_dup 1) (match_dup 2)))] + "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS" +{ + output_asm_insn ("wr\t%%g0, 0, %%y", operands); + + if (TARGET_V9) + return "udivcc\t%1, %2, %0"; + else + return "nop\n\tnop\n\tnop\n\tudivcc\t%1, %2, %0"; +} + [(set_attr "type" "multi") + (set (attr "length") + (if_then_else (eq_attr "isa" "v9") + (const_int 2) (const_int 5)))]) + +; sparclet multiply/accumulate insns + +(define_insn "*smacsi" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "arith_operand" "rI")) + (match_operand:SI 3 "register_operand" "0")))] + "TARGET_SPARCLET" + "smac\t%1, %2, %0" + [(set_attr "type" "imul")]) + +(define_insn "*smacdi" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (mult:DI (sign_extend:DI + (match_operand:SI 1 "register_operand" "%r")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" "r"))) + (match_operand:DI 3 "register_operand" "0")))] + "TARGET_SPARCLET" + "smacd\t%1, %2, %L0" + [(set_attr "type" "imul")]) + +(define_insn "*umacdi" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (mult:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "%r")) + (zero_extend:DI + (match_operand:SI 2 "register_operand" "r"))) + (match_operand:DI 3 "register_operand" "0")))] + "TARGET_SPARCLET" + "umacd\t%1, %2, %L0" + [(set_attr "type" "imul")]) + + +;; Boolean instructions. + +;; We define DImode `and' so with DImode `not' we can get +;; DImode `andn'. Other combinations are possible. + +(define_expand "and3" + [(set (match_operand:V64I 0 "register_operand" "") + (and:V64I (match_operand:V64I 1 "arith_double_operand" "") + (match_operand:V64I 2 "arith_double_operand" "")))] + "" + "") + +(define_insn "*and3_sp32" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (and:V64I (match_operand:V64I 1 "arith_double_operand" "%r,b") + (match_operand:V64I 2 "arith_double_operand" "rHI,b")))] + "! TARGET_ARCH64" + "@ + # + fand\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "length" "2,*") + (set_attr "fptype" "*,double")]) + +(define_insn "*and3_sp64" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (and:V64I (match_operand:V64I 1 "arith_operand" "%r,b") + (match_operand:V64I 2 "arith_operand" "rI,b")))] + "TARGET_ARCH64" + "@ + and\t%1, %2, %0 + fand\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,double")]) + +(define_insn "and3" + [(set (match_operand:V32I 0 "register_operand" "=r,d") + (and:V32I (match_operand:V32I 1 "arith_operand" "%r,d") + (match_operand:V32I 2 "arith_operand" "rI,d")))] + "" + "@ + and\t%1, %2, %0 + fands\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,single")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_compl_high_operand" ""))) + (clobber (match_operand:SI 3 "register_operand" ""))] + "" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 1)))] +{ + operands[4] = GEN_INT (~INTVAL (operands[2])); +}) + +(define_insn_and_split "*and_not__sp32" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (and:V64I (not:V64I (match_operand:V64I 1 "register_operand" "%r,b")) + (match_operand:V64I 2 "register_operand" "r,b")))] + "! TARGET_ARCH64" + "@ + # + fandnot1\t%1, %2, %0" + "&& reload_completed + && ((GET_CODE (operands[0]) == REG + && REGNO (operands[0]) < 32) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) < 32))" + [(set (match_dup 3) (and:SI (not:SI (match_dup 4)) (match_dup 5))) + (set (match_dup 6) (and:SI (not:SI (match_dup 7)) (match_dup 8)))] + "operands[3] = gen_highpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[6] = gen_lowpart (SImode, operands[0]); + operands[7] = gen_lowpart (SImode, operands[1]); + operands[8] = gen_lowpart (SImode, operands[2]);" + [(set_attr "type" "*,fga") + (set_attr "length" "2,*") + (set_attr "fptype" "*,double")]) + +(define_insn "*and_not__sp64" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (and:V64I (not:V64I (match_operand:V64I 1 "register_operand" "%r,b")) + (match_operand:V64I 2 "register_operand" "r,b")))] + "TARGET_ARCH64" + "@ + andn\t%2, %1, %0 + fandnot1\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,double")]) + +(define_insn "*and_not_" + [(set (match_operand:V32I 0 "register_operand" "=r,d") + (and:V32I (not:V32I (match_operand:V32I 1 "register_operand" "%r,d")) + (match_operand:V32I 2 "register_operand" "r,d")))] + "" + "@ + andn\t%2, %1, %0 + fandnot1s\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,single")]) + +(define_expand "ior3" + [(set (match_operand:V64I 0 "register_operand" "") + (ior:V64I (match_operand:V64I 1 "arith_double_operand" "") + (match_operand:V64I 2 "arith_double_operand" "")))] + "" + "") + +(define_insn "*ior3_sp32" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (ior:V64I (match_operand:V64I 1 "arith_double_operand" "%r,b") + (match_operand:V64I 2 "arith_double_operand" "rHI,b")))] + "! TARGET_ARCH64" + "@ + # + for\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "length" "2,*") + (set_attr "fptype" "*,double")]) + +(define_insn "*ior3_sp64" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (ior:V64I (match_operand:V64I 1 "arith_operand" "%r,b") + (match_operand:V64I 2 "arith_operand" "rI,b")))] + "TARGET_ARCH64" + "@ + or\t%1, %2, %0 + for\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,double")]) + +(define_insn "ior3" + [(set (match_operand:V32I 0 "register_operand" "=r,d") + (ior:V32I (match_operand:V32I 1 "arith_operand" "%r,d") + (match_operand:V32I 2 "arith_operand" "rI,d")))] + "" + "@ + or\t%1, %2, %0 + fors\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,single")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (ior:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_compl_high_operand" ""))) + (clobber (match_operand:SI 3 "register_operand" ""))] + "" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (ior:SI (not:SI (match_dup 3)) (match_dup 1)))] +{ + operands[4] = GEN_INT (~INTVAL (operands[2])); +}) + +(define_insn_and_split "*or_not__sp32" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (ior:V64I (not:V64I (match_operand:V64I 1 "register_operand" "r,b")) + (match_operand:V64I 2 "register_operand" "r,b")))] + "! TARGET_ARCH64" + "@ + # + fornot1\t%1, %2, %0" + "&& reload_completed + && ((GET_CODE (operands[0]) == REG + && REGNO (operands[0]) < 32) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) < 32))" + [(set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5))) + (set (match_dup 6) (ior:SI (not:SI (match_dup 7)) (match_dup 8)))] + "operands[3] = gen_highpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[6] = gen_lowpart (SImode, operands[0]); + operands[7] = gen_lowpart (SImode, operands[1]); + operands[8] = gen_lowpart (SImode, operands[2]);" + [(set_attr "type" "*,fga") + (set_attr "length" "2,*") + (set_attr "fptype" "*,double")]) + +(define_insn "*or_not__sp64" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (ior:V64I (not:V64I (match_operand:V64I 1 "register_operand" "r,b")) + (match_operand:V64I 2 "register_operand" "r,b")))] + "TARGET_ARCH64" + "@ + orn\t%2, %1, %0 + fornot1\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,double")]) + +(define_insn "*or_not_" + [(set (match_operand:V32I 0 "register_operand" "=r,d") + (ior:V32I (not:V32I (match_operand:V32I 1 "register_operand" "r,d")) + (match_operand:V32I 2 "register_operand" "r,d")))] + "" + "@ + orn\t%2, %1, %0 + fornot1s\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,single")]) + +(define_expand "xor3" + [(set (match_operand:V64I 0 "register_operand" "") + (xor:V64I (match_operand:V64I 1 "arith_double_operand" "") + (match_operand:V64I 2 "arith_double_operand" "")))] + "" + "") + +(define_insn "*xor3_sp32" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (xor:V64I (match_operand:V64I 1 "arith_double_operand" "%r,b") + (match_operand:V64I 2 "arith_double_operand" "rHI,b")))] + "! TARGET_ARCH64" + "@ + # + fxor\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "length" "2,*") + (set_attr "fptype" "*,double")]) + +(define_insn "*xor3_sp64" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (xor:V64I (match_operand:V64I 1 "arith_operand" "%rJ,b") + (match_operand:V64I 2 "arith_operand" "rI,b")))] + "TARGET_ARCH64" + "@ + xor\t%r1, %2, %0 + fxor\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,double")]) + +(define_insn "xor3" + [(set (match_operand:V32I 0 "register_operand" "=r,d") + (xor:V32I (match_operand:V32I 1 "arith_operand" "%rJ,d") + (match_operand:V32I 2 "arith_operand" "rI,d")))] + "" + "@ + xor\t%r1, %2, %0 + fxors\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,single")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (xor:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_compl_high_operand" ""))) + (clobber (match_operand:SI 3 "register_operand" ""))] + "" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (not:SI (xor:SI (match_dup 3) (match_dup 1))))] +{ + operands[4] = GEN_INT (~INTVAL (operands[2])); +}) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (not:SI (xor:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_compl_high_operand" "")))) + (clobber (match_operand:SI 3 "register_operand" ""))] + "" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (xor:SI (match_dup 3) (match_dup 1)))] +{ + operands[4] = GEN_INT (~INTVAL (operands[2])); +}) + +;; Split DImode logical operations requiring two instructions. +(define_split + [(set (match_operand:V64I 0 "register_operand" "") + (match_operator:V64I 1 "cc_arith_operator" ; AND, IOR, XOR + [(match_operand:V64I 2 "register_operand" "") + (match_operand:V64I 3 "arith_double_operand" "")]))] + "! TARGET_ARCH64 + && reload_completed + && ((GET_CODE (operands[0]) == REG + && REGNO (operands[0]) < 32) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) < 32))" + [(set (match_dup 4) (match_op_dup:SI 1 [(match_dup 6) (match_dup 8)])) + (set (match_dup 5) (match_op_dup:SI 1 [(match_dup 7) (match_dup 9)]))] +{ + operands[4] = gen_highpart (SImode, operands[0]); + operands[5] = gen_lowpart (SImode, operands[0]); + operands[6] = gen_highpart (SImode, operands[2]); + operands[7] = gen_lowpart (SImode, operands[2]); +#if HOST_BITS_PER_WIDE_INT == 32 + if (GET_CODE (operands[3]) == CONST_INT && mode == DImode) + { + if (INTVAL (operands[3]) < 0) + operands[8] = constm1_rtx; + else + operands[8] = const0_rtx; + } + else +#endif + operands[8] = gen_highpart_mode (SImode, mode, operands[3]); + operands[9] = gen_lowpart (SImode, operands[3]); +}) + +;; xnor patterns. Note that (a ^ ~b) == (~a ^ b) == ~(a ^ b). +;; Combine now canonicalizes to the rightmost expression. +(define_insn_and_split "*xor_not__sp32" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (not:V64I (xor:V64I (match_operand:V64I 1 "register_operand" "r,b") + (match_operand:V64I 2 "register_operand" "r,b"))))] + "! TARGET_ARCH64" + "@ + # + fxnor\t%1, %2, %0" + "&& reload_completed + && ((GET_CODE (operands[0]) == REG + && REGNO (operands[0]) < 32) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) < 32))" + [(set (match_dup 3) (not:SI (xor:SI (match_dup 4) (match_dup 5)))) + (set (match_dup 6) (not:SI (xor:SI (match_dup 7) (match_dup 8))))] + "operands[3] = gen_highpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[6] = gen_lowpart (SImode, operands[0]); + operands[7] = gen_lowpart (SImode, operands[1]); + operands[8] = gen_lowpart (SImode, operands[2]);" + [(set_attr "type" "*,fga") + (set_attr "length" "2,*") + (set_attr "fptype" "*,double")]) + +(define_insn "*xor_not__sp64" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (not:V64I (xor:V64I (match_operand:V64I 1 "register_or_zero_operand" "rJ,b") + (match_operand:V64I 2 "arith_operand" "rI,b"))))] + "TARGET_ARCH64" + "@ + xnor\t%r1, %2, %0 + fxnor\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,double")]) + +(define_insn "*xor_not_" + [(set (match_operand:V32I 0 "register_operand" "=r,d") + (not:V32I (xor:V32I (match_operand:V32I 1 "register_or_zero_operand" "rJ,d") + (match_operand:V32I 2 "arith_operand" "rI,d"))))] + "" + "@ + xnor\t%r1, %2, %0 + fxnors\t%1, %2, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,single")]) + +;; These correspond to the above in the case where we also (or only) +;; want to set the condition code. + +(define_insn "*cmp_cc_arith_op" + [(set (reg:CC 100) + (compare:CC + (match_operator:SI 2 "cc_arith_operator" + [(match_operand:SI 0 "arith_operand" "%r") + (match_operand:SI 1 "arith_operand" "rI")]) + (const_int 0)))] + "" + "%A2cc\t%0, %1, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_arith_op" + [(set (reg:CCX 100) + (compare:CCX + (match_operator:DI 2 "cc_arith_operator" + [(match_operand:DI 0 "arith_operand" "%r") + (match_operand:DI 1 "arith_operand" "rI")]) + (const_int 0)))] + "TARGET_ARCH64" + "%A2cc\t%0, %1, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_cc_arith_op_set" + [(set (reg:CC 100) + (compare:CC + (match_operator:SI 3 "cc_arith_operator" + [(match_operand:SI 1 "arith_operand" "%r") + (match_operand:SI 2 "arith_operand" "rI")]) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 4 "cc_arith_operator" [(match_dup 1) (match_dup 2)]))] + "GET_CODE (operands[3]) == GET_CODE (operands[4])" + "%A3cc\t%1, %2, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_arith_op_set" + [(set (reg:CCX 100) + (compare:CCX + (match_operator:DI 3 "cc_arith_operator" + [(match_operand:DI 1 "arith_operand" "%r") + (match_operand:DI 2 "arith_operand" "rI")]) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (match_operator:DI 4 "cc_arith_operator" [(match_dup 1) (match_dup 2)]))] + "TARGET_ARCH64 && GET_CODE (operands[3]) == GET_CODE (operands[4])" + "%A3cc\t%1, %2, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_cc_xor_not" + [(set (reg:CC 100) + (compare:CC + (not:SI (xor:SI (match_operand:SI 0 "register_or_zero_operand" "%rJ") + (match_operand:SI 1 "arith_operand" "rI"))) + (const_int 0)))] + "" + "xnorcc\t%r0, %1, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_xor_not" + [(set (reg:CCX 100) + (compare:CCX + (not:DI (xor:DI (match_operand:DI 0 "register_or_zero_operand" "%rJ") + (match_operand:DI 1 "arith_operand" "rI"))) + (const_int 0)))] + "TARGET_ARCH64" + "xnorcc\t%r0, %1, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_cc_xor_not_set" + [(set (reg:CC 100) + (compare:CC + (not:SI (xor:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ") + (match_operand:SI 2 "arith_operand" "rI"))) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (not:SI (xor:SI (match_dup 1) (match_dup 2))))] + "" + "xnorcc\t%r1, %2, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_xor_not_set" + [(set (reg:CCX 100) + (compare:CCX + (not:DI (xor:DI (match_operand:DI 1 "register_or_zero_operand" "%rJ") + (match_operand:DI 2 "arith_operand" "rI"))) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (not:DI (xor:DI (match_dup 1) (match_dup 2))))] + "TARGET_ARCH64" + "xnorcc\t%r1, %2, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_cc_arith_op_not" + [(set (reg:CC 100) + (compare:CC + (match_operator:SI 2 "cc_arith_not_operator" + [(not:SI (match_operand:SI 0 "arith_operand" "rI")) + (match_operand:SI 1 "register_or_zero_operand" "rJ")]) + (const_int 0)))] + "" + "%B2cc\t%r1, %0, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_arith_op_not" + [(set (reg:CCX 100) + (compare:CCX + (match_operator:DI 2 "cc_arith_not_operator" + [(not:DI (match_operand:DI 0 "arith_operand" "rI")) + (match_operand:DI 1 "register_or_zero_operand" "rJ")]) + (const_int 0)))] + "TARGET_ARCH64" + "%B2cc\t%r1, %0, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_cc_arith_op_not_set" + [(set (reg:CC 100) + (compare:CC + (match_operator:SI 3 "cc_arith_not_operator" + [(not:SI (match_operand:SI 1 "arith_operand" "rI")) + (match_operand:SI 2 "register_or_zero_operand" "rJ")]) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 4 "cc_arith_not_operator" + [(not:SI (match_dup 1)) (match_dup 2)]))] + "GET_CODE (operands[3]) == GET_CODE (operands[4])" + "%B3cc\t%r2, %1, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_arith_op_not_set" + [(set (reg:CCX 100) + (compare:CCX + (match_operator:DI 3 "cc_arith_not_operator" + [(not:DI (match_operand:DI 1 "arith_operand" "rI")) + (match_operand:DI 2 "register_or_zero_operand" "rJ")]) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (match_operator:DI 4 "cc_arith_not_operator" + [(not:DI (match_dup 1)) (match_dup 2)]))] + "TARGET_ARCH64 && GET_CODE (operands[3]) == GET_CODE (operands[4])" + "%B3cc\t%r2, %1, %0" + [(set_attr "type" "compare")]) + +;; We cannot use the "neg" pseudo insn because the Sun assembler +;; does not know how to make it work for constants. + +(define_expand "negdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "register_operand" "r")))] + "" +{ + if (! TARGET_ARCH64) + { + emit_insn (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, + gen_rtx_SET (VOIDmode, operand0, + gen_rtx_NEG (DImode, operand1)), + gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (CCmode, + SPARC_ICC_REG))))); + DONE; + } +}) + +(define_insn_and_split "*negdi2_sp32" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "register_operand" "r"))) + (clobber (reg:CC 100))] + "! TARGET_ARCH64" + "#" + "&& reload_completed" + [(parallel [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (minus:SI (const_int 0) (match_dup 5)) + (const_int 0))) + (set (match_dup 4) (minus:SI (const_int 0) (match_dup 5)))]) + (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3)) + (ltu:SI (reg:CC 100) (const_int 0))))] + "operands[2] = gen_highpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[4] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_lowpart (SImode, operands[1]);" + [(set_attr "length" "2")]) + +(define_insn "*negdi2_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_ARCH64" + "sub\t%%g0, %1, %0") + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_operand:SI 1 "arith_operand" "rI")))] + "" + "sub\t%%g0, %1, %0") + +(define_insn "*cmp_cc_neg" + [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (neg:SI (match_operand:SI 0 "arith_operand" "rI")) + (const_int 0)))] + "" + "subcc\t%%g0, %0, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_neg" + [(set (reg:CCX_NOOV 100) + (compare:CCX_NOOV (neg:DI (match_operand:DI 0 "arith_operand" "rI")) + (const_int 0)))] + "TARGET_ARCH64" + "subcc\t%%g0, %0, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_cc_set_neg" + [(set (reg:CC_NOOV 100) + (compare:CC_NOOV (neg:SI (match_operand:SI 1 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_dup 1)))] + "" + "subcc\t%%g0, %1, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_set_neg" + [(set (reg:CCX_NOOV 100) + (compare:CCX_NOOV (neg:DI (match_operand:DI 1 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_dup 1)))] + "TARGET_ARCH64" + "subcc\t%%g0, %1, %0" + [(set_attr "type" "compare")]) + +;; We cannot use the "not" pseudo insn because the Sun assembler +;; does not know how to make it work for constants. +(define_expand "one_cmpl2" + [(set (match_operand:V64I 0 "register_operand" "") + (not:V64I (match_operand:V64I 1 "register_operand" "")))] + "" + "") + +(define_insn_and_split "*one_cmpl2_sp32" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (not:V64I (match_operand:V64I 1 "register_operand" "r,b")))] + "! TARGET_ARCH64" + "@ + # + fnot1\t%1, %0" + "&& reload_completed + && ((GET_CODE (operands[0]) == REG + && REGNO (operands[0]) < 32) + || (GET_CODE (operands[0]) == SUBREG + && GET_CODE (SUBREG_REG (operands[0])) == REG + && REGNO (SUBREG_REG (operands[0])) < 32))" + [(set (match_dup 2) (not:SI (xor:SI (match_dup 3) (const_int 0)))) + (set (match_dup 4) (not:SI (xor:SI (match_dup 5) (const_int 0))))] + "operands[2] = gen_highpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[4] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_lowpart (SImode, operands[1]);" + [(set_attr "type" "*,fga") + (set_attr "length" "2,*") + (set_attr "fptype" "*,double")]) + +(define_insn "*one_cmpl2_sp64" + [(set (match_operand:V64I 0 "register_operand" "=r,b") + (not:V64I (match_operand:V64I 1 "arith_operand" "rI,b")))] + "TARGET_ARCH64" + "@ + xnor\t%%g0, %1, %0 + fnot1\t%1, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,double")]) + +(define_insn "one_cmpl2" + [(set (match_operand:V32I 0 "register_operand" "=r,d") + (not:V32I (match_operand:V32I 1 "arith_operand" "rI,d")))] + "" + "@ + xnor\t%%g0, %1, %0 + fnot1s\t%1, %0" + [(set_attr "type" "*,fga") + (set_attr "fptype" "*,single")]) + +(define_insn "*cmp_cc_not" + [(set (reg:CC 100) + (compare:CC (not:SI (match_operand:SI 0 "arith_operand" "rI")) + (const_int 0)))] + "" + "xnorcc\t%%g0, %0, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_not" + [(set (reg:CCX 100) + (compare:CCX (not:DI (match_operand:DI 0 "arith_operand" "rI")) + (const_int 0)))] + "TARGET_ARCH64" + "xnorcc\t%%g0, %0, %%g0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_cc_set_not" + [(set (reg:CC 100) + (compare:CC (not:SI (match_operand:SI 1 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_dup 1)))] + "" + "xnorcc\t%%g0, %1, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_set_not" + [(set (reg:CCX 100) + (compare:CCX (not:DI (match_operand:DI 1 "arith_operand" "rI")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (not:DI (match_dup 1)))] + "TARGET_ARCH64" + "xnorcc\t%%g0, %1, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_cc_set" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "register_operand" "r")) + (set (reg:CC 100) + (compare:CC (match_dup 1) + (const_int 0)))] + "" + "orcc\t%1, 0, %0" + [(set_attr "type" "compare")]) + +(define_insn "*cmp_ccx_set64" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "register_operand" "r")) + (set (reg:CCX 100) + (compare:CCX (match_dup 1) + (const_int 0)))] + "TARGET_ARCH64" + "orcc\t%1, 0, %0" + [(set_attr "type" "compare")]) + + +;; Floating point arithmetic instructions. + +(define_expand "addtf3" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (plus:TF (match_operand:TF 1 "general_operand" "") + (match_operand:TF 2 "general_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_binop (PLUS, operands); DONE;") + +(define_insn "*addtf3_hq" + [(set (match_operand:TF 0 "register_operand" "=e") + (plus:TF (match_operand:TF 1 "register_operand" "e") + (match_operand:TF 2 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "faddq\t%1, %2, %0" + [(set_attr "type" "fp")]) + +(define_insn "adddf3" + [(set (match_operand:DF 0 "register_operand" "=e") + (plus:DF (match_operand:DF 1 "register_operand" "e") + (match_operand:DF 2 "register_operand" "e")))] + "TARGET_FPU" + "faddd\t%1, %2, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_insn "addsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (plus:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_FPU" + "fadds\t%1, %2, %0" + [(set_attr "type" "fp")]) + +(define_expand "subtf3" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (minus:TF (match_operand:TF 1 "general_operand" "") + (match_operand:TF 2 "general_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_binop (MINUS, operands); DONE;") + +(define_insn "*subtf3_hq" + [(set (match_operand:TF 0 "register_operand" "=e") + (minus:TF (match_operand:TF 1 "register_operand" "e") + (match_operand:TF 2 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "fsubq\t%1, %2, %0" + [(set_attr "type" "fp")]) + +(define_insn "subdf3" + [(set (match_operand:DF 0 "register_operand" "=e") + (minus:DF (match_operand:DF 1 "register_operand" "e") + (match_operand:DF 2 "register_operand" "e")))] + "TARGET_FPU" + "fsubd\t%1, %2, %0" + [(set_attr "type" "fp") + (set_attr "fptype" "double")]) + +(define_insn "subsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (minus:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_FPU" + "fsubs\t%1, %2, %0" + [(set_attr "type" "fp")]) + +(define_expand "multf3" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (mult:TF (match_operand:TF 1 "general_operand" "") + (match_operand:TF 2 "general_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_binop (MULT, operands); DONE;") + +(define_insn "*multf3_hq" + [(set (match_operand:TF 0 "register_operand" "=e") + (mult:TF (match_operand:TF 1 "register_operand" "e") + (match_operand:TF 2 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "fmulq\t%1, %2, %0" + [(set_attr "type" "fpmul")]) + +(define_insn "muldf3" + [(set (match_operand:DF 0 "register_operand" "=e") + (mult:DF (match_operand:DF 1 "register_operand" "e") + (match_operand:DF 2 "register_operand" "e")))] + "TARGET_FPU" + "fmuld\t%1, %2, %0" + [(set_attr "type" "fpmul") + (set_attr "fptype" "double")]) + +(define_insn "mulsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (mult:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_FPU" + "fmuls\t%1, %2, %0" + [(set_attr "type" "fpmul")]) + +(define_insn "*muldf3_extend" + [(set (match_operand:DF 0 "register_operand" "=e") + (mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "f")) + (float_extend:DF (match_operand:SF 2 "register_operand" "f"))))] + "(TARGET_V8 || TARGET_V9) && TARGET_FPU" + "fsmuld\t%1, %2, %0" + [(set_attr "type" "fpmul") + (set_attr "fptype" "double")]) + +(define_insn "*multf3_extend" + [(set (match_operand:TF 0 "register_operand" "=e") + (mult:TF (float_extend:TF (match_operand:DF 1 "register_operand" "e")) + (float_extend:TF (match_operand:DF 2 "register_operand" "e"))))] + "(TARGET_V8 || TARGET_V9) && TARGET_FPU && TARGET_HARD_QUAD" + "fdmulq\t%1, %2, %0" + [(set_attr "type" "fpmul")]) + +(define_expand "divtf3" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (div:TF (match_operand:TF 1 "general_operand" "") + (match_operand:TF 2 "general_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_binop (DIV, operands); DONE;") + +;; don't have timing for quad-prec. divide. +(define_insn "*divtf3_hq" + [(set (match_operand:TF 0 "register_operand" "=e") + (div:TF (match_operand:TF 1 "register_operand" "e") + (match_operand:TF 2 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "fdivq\t%1, %2, %0" + [(set_attr "type" "fpdivd")]) + +(define_insn "divdf3" + [(set (match_operand:DF 0 "register_operand" "=e") + (div:DF (match_operand:DF 1 "register_operand" "e") + (match_operand:DF 2 "register_operand" "e")))] + "TARGET_FPU" + "fdivd\t%1, %2, %0" + [(set_attr "type" "fpdivd") + (set_attr "fptype" "double")]) + +(define_insn "divsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (div:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_FPU" + "fdivs\t%1, %2, %0" + [(set_attr "type" "fpdivs")]) + +(define_expand "negtf2" + [(set (match_operand:TF 0 "register_operand" "=e,e") + (neg:TF (match_operand:TF 1 "register_operand" "0,e")))] + "TARGET_FPU" + "") + +(define_insn_and_split "*negtf2_notv9" + [(set (match_operand:TF 0 "register_operand" "=e,e") + (neg:TF (match_operand:TF 1 "register_operand" "0,e")))] + ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD. + "TARGET_FPU + && ! TARGET_V9" + "@ + fnegs\t%0, %0 + #" + "&& reload_completed + && sparc_absnegfloat_split_legitimate (operands[0], operands[1])" + [(set (match_dup 2) (neg:SF (match_dup 3))) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 6) (match_dup 7))] + "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0])); + operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1])); + operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1); + operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1); + operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2); + operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);" + [(set_attr "type" "fpmove,*") + (set_attr "length" "*,2")]) + +(define_insn_and_split "*negtf2_v9" + [(set (match_operand:TF 0 "register_operand" "=e,e") + (neg:TF (match_operand:TF 1 "register_operand" "0,e")))] + ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD. + "TARGET_FPU && TARGET_V9" + "@ + fnegd\t%0, %0 + #" + "&& reload_completed + && sparc_absnegfloat_split_legitimate (operands[0], operands[1])" + [(set (match_dup 2) (neg:DF (match_dup 3))) + (set (match_dup 4) (match_dup 5))] + "operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0])); + operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1])); + operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2); + operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);" + [(set_attr "type" "fpmove,*") + (set_attr "length" "*,2") + (set_attr "fptype" "double")]) + +(define_expand "negdf2" + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" "")))] + "TARGET_FPU" + "") + +(define_insn_and_split "*negdf2_notv9" + [(set (match_operand:DF 0 "register_operand" "=e,e") + (neg:DF (match_operand:DF 1 "register_operand" "0,e")))] + "TARGET_FPU && ! TARGET_V9" + "@ + fnegs\t%0, %0 + #" + "&& reload_completed + && sparc_absnegfloat_split_legitimate (operands[0], operands[1])" + [(set (match_dup 2) (neg:SF (match_dup 3))) + (set (match_dup 4) (match_dup 5))] + "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0])); + operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1])); + operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1); + operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);" + [(set_attr "type" "fpmove,*") + (set_attr "length" "*,2")]) + +(define_insn "*negdf2_v9" + [(set (match_operand:DF 0 "register_operand" "=e") + (neg:DF (match_operand:DF 1 "register_operand" "e")))] + "TARGET_FPU && TARGET_V9" + "fnegd\t%1, %0" + [(set_attr "type" "fpmove") + (set_attr "fptype" "double")]) + +(define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_FPU" + "fnegs\t%1, %0" + [(set_attr "type" "fpmove")]) + +(define_expand "abstf2" + [(set (match_operand:TF 0 "register_operand" "") + (abs:TF (match_operand:TF 1 "register_operand" "")))] + "TARGET_FPU" + "") + +(define_insn_and_split "*abstf2_notv9" + [(set (match_operand:TF 0 "register_operand" "=e,e") + (abs:TF (match_operand:TF 1 "register_operand" "0,e")))] + ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD. + "TARGET_FPU && ! TARGET_V9" + "@ + fabss\t%0, %0 + #" + "&& reload_completed + && sparc_absnegfloat_split_legitimate (operands[0], operands[1])" + [(set (match_dup 2) (abs:SF (match_dup 3))) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 6) (match_dup 7))] + "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0])); + operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1])); + operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1); + operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1); + operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2); + operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);" + [(set_attr "type" "fpmove,*") + (set_attr "length" "*,2")]) + +(define_insn "*abstf2_hq_v9" + [(set (match_operand:TF 0 "register_operand" "=e,e") + (abs:TF (match_operand:TF 1 "register_operand" "0,e")))] + "TARGET_FPU && TARGET_V9 && TARGET_HARD_QUAD" + "@ + fabsd\t%0, %0 + fabsq\t%1, %0" + [(set_attr "type" "fpmove") + (set_attr "fptype" "double,*")]) + +(define_insn_and_split "*abstf2_v9" + [(set (match_operand:TF 0 "register_operand" "=e,e") + (abs:TF (match_operand:TF 1 "register_operand" "0,e")))] + "TARGET_FPU && TARGET_V9 && !TARGET_HARD_QUAD" + "@ + fabsd\t%0, %0 + #" + "&& reload_completed + && sparc_absnegfloat_split_legitimate (operands[0], operands[1])" + [(set (match_dup 2) (abs:DF (match_dup 3))) + (set (match_dup 4) (match_dup 5))] + "operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0])); + operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1])); + operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2); + operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);" + [(set_attr "type" "fpmove,*") + (set_attr "length" "*,2") + (set_attr "fptype" "double,*")]) + +(define_expand "absdf2" + [(set (match_operand:DF 0 "register_operand" "") + (abs:DF (match_operand:DF 1 "register_operand" "")))] + "TARGET_FPU" + "") + +(define_insn_and_split "*absdf2_notv9" + [(set (match_operand:DF 0 "register_operand" "=e,e") + (abs:DF (match_operand:DF 1 "register_operand" "0,e")))] + "TARGET_FPU && ! TARGET_V9" + "@ + fabss\t%0, %0 + #" + "&& reload_completed + && sparc_absnegfloat_split_legitimate (operands[0], operands[1])" + [(set (match_dup 2) (abs:SF (match_dup 3))) + (set (match_dup 4) (match_dup 5))] + "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0])); + operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1])); + operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1); + operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);" + [(set_attr "type" "fpmove,*") + (set_attr "length" "*,2")]) + +(define_insn "*absdf2_v9" + [(set (match_operand:DF 0 "register_operand" "=e") + (abs:DF (match_operand:DF 1 "register_operand" "e")))] + "TARGET_FPU && TARGET_V9" + "fabsd\t%1, %0" + [(set_attr "type" "fpmove") + (set_attr "fptype" "double")]) + +(define_insn "abssf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (abs:SF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_FPU" + "fabss\t%1, %0" + [(set_attr "type" "fpmove")]) + +(define_expand "sqrttf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (sqrt:TF (match_operand:TF 1 "general_operand" "")))] + "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)" + "emit_tfmode_unop (SQRT, operands); DONE;") + +(define_insn "*sqrttf2_hq" + [(set (match_operand:TF 0 "register_operand" "=e") + (sqrt:TF (match_operand:TF 1 "register_operand" "e")))] + "TARGET_FPU && TARGET_HARD_QUAD" + "fsqrtq\t%1, %0" + [(set_attr "type" "fpsqrtd")]) + +(define_insn "sqrtdf2" + [(set (match_operand:DF 0 "register_operand" "=e") + (sqrt:DF (match_operand:DF 1 "register_operand" "e")))] + "TARGET_FPU" + "fsqrtd\t%1, %0" + [(set_attr "type" "fpsqrtd") + (set_attr "fptype" "double")]) + +(define_insn "sqrtsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (sqrt:SF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_FPU" + "fsqrts\t%1, %0" + [(set_attr "type" "fpsqrts")]) + + +;; Arithmetic shift instructions. + +(define_insn "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "" +{ + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f); + return "sll\t%1, %2, %0"; +} + [(set (attr "type") + (if_then_else (match_operand 2 "const_one_operand" "") + (const_string "ialu") (const_string "shift")))]) + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "TARGET_ARCH64 || TARGET_V8PLUS" +{ + if (! TARGET_ARCH64) + { + if (GET_CODE (operands[2]) == CONST_INT) + FAIL; + emit_insn (gen_ashldi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_insn "*ashldi3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "TARGET_ARCH64" +{ + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); + return "sllx\t%1, %2, %0"; +} + [(set (attr "type") + (if_then_else (match_operand 2 "const_one_operand" "") + (const_string "ialu") (const_string "shift")))]) + +;; XXX UGH! +(define_insn "ashldi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=&h,&h,r") + (ashift:DI (match_operand:DI 1 "arith_operand" "rI,0,rI") + (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) + (clobber (match_scratch:SI 3 "=X,X,&h"))] + "TARGET_V8PLUS" + "* return output_v8plus_shift (operands, insn, \"sllx\");" + [(set_attr "type" "multi") + (set_attr "length" "5,5,6")]) + +;; Optimize (1LL<= 32 && INTVAL (operands[2]) < 64" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) - 32); + return "sra\t%1, %2, %0"; +} + [(set_attr "type" "shift")]) + +(define_expand "ashrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "TARGET_ARCH64 || TARGET_V8PLUS" +{ + if (! TARGET_ARCH64) + { + if (GET_CODE (operands[2]) == CONST_INT) + FAIL; /* prefer generic code in this case */ + emit_insn (gen_ashrdi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_insn "*ashrdi3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "TARGET_ARCH64" + + { + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); + return "srax\t%1, %2, %0"; + } + [(set_attr "type" "shift")]) + +;; XXX +(define_insn "ashrdi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=&h,&h,r") + (ashiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI") + (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) + (clobber (match_scratch:SI 3 "=X,X,&h"))] + "TARGET_V8PLUS" + "* return output_v8plus_shift (operands, insn, \"srax\");" + [(set_attr "type" "multi") + (set_attr "length" "5,5,6")]) + +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "" + { + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f); + return "srl\t%1, %2, %0"; + } + [(set_attr "type" "shift")]) + +;; This handles the case where +;; (zero_extend:DI (lshiftrt:SI (match_operand:SI) (match_operand:SI))), +;; but combiner "simplifies" it for us. +(define_insn "*lshrsi3_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (subreg:DI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "r")) 0) + (match_operand 3 "const_int_operand" "")))] + "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[3]) == 0xffffffff" + "srl\t%1, %2, %0" + [(set_attr "type" "shift")]) + +;; This handles the case where +;; (lshiftrt:DI (zero_extend:DI (match_operand:SI)) (const_int >=0 < 32)) +;; but combiner "simplifies" it for us. +(define_insn "*lshrsi3_extend2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0) + (match_operand 2 "small_int_operand" "I") + (const_int 32)))] + "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 32" +{ + operands[2] = GEN_INT (32 - INTVAL (operands[2])); + return "srl\t%1, %2, %0"; +} + [(set_attr "type" "shift")]) + +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "TARGET_ARCH64 || TARGET_V8PLUS" +{ + if (! TARGET_ARCH64) + { + if (GET_CODE (operands[2]) == CONST_INT) + FAIL; + emit_insn (gen_lshrdi3_v8plus (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_insn "*lshrdi3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] + "TARGET_ARCH64" + { + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); + return "srlx\t%1, %2, %0"; + } + [(set_attr "type" "shift")]) + +;; XXX +(define_insn "lshrdi3_v8plus" + [(set (match_operand:DI 0 "register_operand" "=&h,&h,r") + (lshiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI") + (match_operand:SI 2 "arith_operand" "rI,rI,rI"))) + (clobber (match_scratch:SI 3 "=X,X,&h"))] + "TARGET_V8PLUS" + "* return output_v8plus_shift (operands, insn, \"srlx\");" + [(set_attr "type" "multi") + (set_attr "length" "5,5,6")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashiftrt:SI (subreg:SI (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (const_int 32)) 4) + (match_operand:SI 2 "small_int_operand" "I")))] + "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 32" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) + 32); + return "srax\t%1, %2, %0"; +} + [(set_attr "type" "shift")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (lshiftrt:SI (subreg:SI (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") + (const_int 32)) 4) + (match_operand:SI 2 "small_int_operand" "I")))] + "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 32" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) + 32); + return "srlx\t%1, %2, %0"; +} + [(set_attr "type" "shift")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashiftrt:SI (subreg:SI (ashiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "small_int_operand" "I")) 4) + (match_operand:SI 3 "small_int_operand" "I")))] + "TARGET_ARCH64 + && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 32 + && (unsigned HOST_WIDE_INT) INTVAL (operands[3]) < 32 + && (unsigned HOST_WIDE_INT) (INTVAL (operands[2]) + INTVAL (operands[3])) < 64" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3])); + + return "srax\t%1, %2, %0"; +} + [(set_attr "type" "shift")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (lshiftrt:SI (subreg:SI (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "small_int_operand" "I")) 4) + (match_operand:SI 3 "small_int_operand" "I")))] + "TARGET_ARCH64 + && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 32 + && (unsigned HOST_WIDE_INT) INTVAL (operands[3]) < 32 + && (unsigned HOST_WIDE_INT) (INTVAL (operands[2]) + INTVAL (operands[3])) < 64" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3])); + + return "srlx\t%1, %2, %0"; +} + [(set_attr "type" "shift")]) + + +;; Unconditional and other jump instructions. + +(define_insn "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" + "* return output_ubranch (operands[0], 0, insn);" + [(set_attr "type" "uncond_branch")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand 0 "register_operand" "r")) + (use (label_ref (match_operand 1 "" "")))])] + "" +{ + gcc_assert (GET_MODE (operands[0]) == CASE_VECTOR_MODE); + + /* In pic mode, our address differences are against the base of the + table. Add that base value back in; CSE ought to be able to combine + the two address loads. */ + if (flag_pic) + { + rtx tmp, tmp2; + tmp = gen_rtx_LABEL_REF (Pmode, operands[1]); + tmp2 = operands[0]; + if (CASE_VECTOR_MODE != Pmode) + tmp2 = gen_rtx_SIGN_EXTEND (Pmode, tmp2); + tmp = gen_rtx_PLUS (Pmode, tmp2, tmp); + operands[0] = memory_address (Pmode, tmp); + } +}) + +(define_insn "*tablejump_sp32" + [(set (pc) (match_operand:SI 0 "address_operand" "p")) + (use (label_ref (match_operand 1 "" "")))] + "! TARGET_ARCH64" + "jmp\t%a0%#" + [(set_attr "type" "uncond_branch")]) + +(define_insn "*tablejump_sp64" + [(set (pc) (match_operand:DI 0 "address_operand" "p")) + (use (label_ref (match_operand 1 "" "")))] + "TARGET_ARCH64" + "jmp\t%a0%#" + [(set_attr "type" "uncond_branch")]) + + +;; Jump to subroutine instructions. + +(define_expand "call" + ;; Note that this expression is not used for generating RTL. + ;; All the RTL is generated explicitly below. + [(call (match_operand 0 "call_operand" "") + (match_operand 3 "" "i"))] + ;; operands[2] is next_arg_register + ;; operands[3] is struct_value_size_rtx. + "" +{ + rtx fn_rtx; + + gcc_assert (MEM_P (operands[0]) && GET_MODE (operands[0]) == FUNCTION_MODE); + + gcc_assert (GET_CODE (operands[3]) == CONST_INT); + + if (GET_CODE (XEXP (operands[0], 0)) == LABEL_REF) + { + /* This is really a PIC sequence. We want to represent + it as a funny jump so its delay slots can be filled. + + ??? But if this really *is* a CALL, will not it clobber the + call-clobbered registers? We lose this if it is a JUMP_INSN. + Why cannot we have delay slots filled if it were a CALL? */ + + /* We accept negative sizes for untyped calls. */ + if (! TARGET_ARCH64 && INTVAL (operands[3]) != 0) + emit_jump_insn + (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (3, + gen_rtx_SET (VOIDmode, pc_rtx, XEXP (operands[0], 0)), + operands[3], + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15))))); + else + emit_jump_insn + (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, + gen_rtx_SET (VOIDmode, pc_rtx, XEXP (operands[0], 0)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15))))); + goto finish_call; + } + + fn_rtx = operands[0]; + + /* We accept negative sizes for untyped calls. */ + if (! TARGET_ARCH64 && INTVAL (operands[3]) != 0) + sparc_emit_call_insn + (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (3, gen_rtx_CALL (VOIDmode, fn_rtx, const0_rtx), + operands[3], + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)))), + XEXP (fn_rtx, 0)); + else + sparc_emit_call_insn + (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, gen_rtx_CALL (VOIDmode, fn_rtx, const0_rtx), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)))), + XEXP (fn_rtx, 0)); + + finish_call: + + DONE; +}) + +;; We can't use the same pattern for these two insns, because then registers +;; in the address may not be properly reloaded. + +(define_insn "*call_address_sp32" + [(call (mem:SI (match_operand:SI 0 "address_operand" "p")) + (match_operand 1 "" "")) + (clobber (reg:SI 15))] + ;;- Do not use operand 1 for most machines. + "! TARGET_ARCH64" + "call\t%a0, %1%#" + [(set_attr "type" "call")]) + +(define_insn "*call_symbolic_sp32" + [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s")) + (match_operand 1 "" "")) + (clobber (reg:SI 15))] + ;;- Do not use operand 1 for most machines. + "! TARGET_ARCH64" + "call\t%a0, %1%#" + [(set_attr "type" "call")]) + +(define_insn "*call_address_sp64" + [(call (mem:DI (match_operand:DI 0 "address_operand" "p")) + (match_operand 1 "" "")) + (clobber (reg:DI 15))] + ;;- Do not use operand 1 for most machines. + "TARGET_ARCH64" + "call\t%a0, %1%#" + [(set_attr "type" "call")]) + +(define_insn "*call_symbolic_sp64" + [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "s")) + (match_operand 1 "" "")) + (clobber (reg:DI 15))] + ;;- Do not use operand 1 for most machines. + "TARGET_ARCH64" + "call\t%a0, %1%#" + [(set_attr "type" "call")]) + +;; This is a call that wants a structure value. +;; There is no such critter for v9 (??? we may need one anyway). +(define_insn "*call_address_struct_value_sp32" + [(call (mem:SI (match_operand:SI 0 "address_operand" "p")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "") + (clobber (reg:SI 15))] + ;;- Do not use operand 1 for most machines. + "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) > 0" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) & 0xfff); + return "call\t%a0, %1\n\t nop\n\tunimp\t%2"; +} + [(set_attr "type" "call_no_delay_slot") + (set_attr "length" "3")]) + +;; This is a call that wants a structure value. +;; There is no such critter for v9 (??? we may need one anyway). +(define_insn "*call_symbolic_struct_value_sp32" + [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "") + (clobber (reg:SI 15))] + ;;- Do not use operand 1 for most machines. + "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) > 0" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) & 0xfff); + return "call\t%a0, %1\n\t nop\n\tunimp\t%2"; +} + [(set_attr "type" "call_no_delay_slot") + (set_attr "length" "3")]) + +;; This is a call that may want a structure value. This is used for +;; untyped_calls. +(define_insn "*call_address_untyped_struct_value_sp32" + [(call (mem:SI (match_operand:SI 0 "address_operand" "p")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "") + (clobber (reg:SI 15))] + ;;- Do not use operand 1 for most machines. + "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0" + "call\t%a0, %1\n\t nop\n\tnop" + [(set_attr "type" "call_no_delay_slot") + (set_attr "length" "3")]) + +;; This is a call that may want a structure value. This is used for +;; untyped_calls. +(define_insn "*call_symbolic_untyped_struct_value_sp32" + [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "") + (clobber (reg:SI 15))] + ;;- Do not use operand 1 for most machines. + "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0" + "call\t%a0, %1\n\t nop\n\tnop" + [(set_attr "type" "call_no_delay_slot") + (set_attr "length" "3")]) + +(define_expand "call_value" + ;; Note that this expression is not used for generating RTL. + ;; All the RTL is generated explicitly below. + [(set (match_operand 0 "register_operand" "=rf") + (call (match_operand 1 "" "") + (match_operand 4 "" "")))] + ;; operand 2 is stack_size_rtx + ;; operand 3 is next_arg_register + "" +{ + rtx fn_rtx; + rtvec vec; + + gcc_assert (MEM_P (operands[1]) && GET_MODE (operands[1]) == FUNCTION_MODE); + + fn_rtx = operands[1]; + + vec = gen_rtvec (2, + gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_CALL (VOIDmode, fn_rtx, const0_rtx)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15))); + + sparc_emit_call_insn (gen_rtx_PARALLEL (VOIDmode, vec), XEXP (fn_rtx, 0)); + + DONE; +}) + +(define_insn "*call_value_address_sp32" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "address_operand" "p")) + (match_operand 2 "" ""))) + (clobber (reg:SI 15))] + ;;- Do not use operand 2 for most machines. + "! TARGET_ARCH64" + "call\t%a1, %2%#" + [(set_attr "type" "call")]) + +(define_insn "*call_value_symbolic_sp32" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "symbolic_operand" "s")) + (match_operand 2 "" ""))) + (clobber (reg:SI 15))] + ;;- Do not use operand 2 for most machines. + "! TARGET_ARCH64" + "call\t%a1, %2%#" + [(set_attr "type" "call")]) + +(define_insn "*call_value_address_sp64" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "address_operand" "p")) + (match_operand 2 "" ""))) + (clobber (reg:DI 15))] + ;;- Do not use operand 2 for most machines. + "TARGET_ARCH64" + "call\t%a1, %2%#" + [(set_attr "type" "call")]) + +(define_insn "*call_value_symbolic_sp64" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "symbolic_operand" "s")) + (match_operand 2 "" ""))) + (clobber (reg:DI 15))] + ;;- Do not use operand 2 for most machines. + "TARGET_ARCH64" + "call\t%a1, %2%#" + [(set_attr "type" "call")]) + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand:BLK 1 "memory_operand" "") + (match_operand 2 "" "")])] + "" +{ + rtx valreg1 = gen_rtx_REG (DImode, 8); + rtx valreg2 = gen_rtx_REG (TARGET_ARCH64 ? TFmode : DFmode, 32); + rtx result = operands[1]; + + /* Pass constm1 to indicate that it may expect a structure value, but + we don't know what size it is. */ + emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, constm1_rtx)); + + /* Save the function value registers. */ + emit_move_insn (adjust_address (result, DImode, 0), valreg1); + emit_move_insn (adjust_address (result, TARGET_ARCH64 ? TFmode : DFmode, 8), + valreg2); + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}) + +;; Tail call instructions. + +(define_expand "sibcall" + [(parallel [(call (match_operand 0 "call_operand" "") (const_int 0)) + (return)])] + "" + "") + +(define_insn "*sibcall_symbolic_sp32" + [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s")) + (match_operand 1 "" "")) + (return)] + "! TARGET_ARCH64" + "* return output_sibcall(insn, operands[0]);" + [(set_attr "type" "sibcall")]) + +(define_insn "*sibcall_symbolic_sp64" + [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "s")) + (match_operand 1 "" "")) + (return)] + "TARGET_ARCH64" + "* return output_sibcall(insn, operands[0]);" + [(set_attr "type" "sibcall")]) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "register_operand" "=rf") + (call (match_operand 1 "" "") (const_int 0))) + (return)])] + "" + "") + +(define_insn "*sibcall_value_symbolic_sp32" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "symbolic_operand" "s")) + (match_operand 2 "" ""))) + (return)] + "! TARGET_ARCH64" + "* return output_sibcall(insn, operands[1]);" + [(set_attr "type" "sibcall")]) + +(define_insn "*sibcall_value_symbolic_sp64" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "symbolic_operand" "s")) + (match_operand 2 "" ""))) + (return)] + "TARGET_ARCH64" + "* return output_sibcall(insn, operands[1]);" + [(set_attr "type" "sibcall")]) + + +;; Special instructions. + +(define_expand "prologue" + [(const_int 0)] + "" +{ + sparc_expand_prologue (); + DONE; +}) + +;; The "save register window" insn is modelled as follows so that the DWARF-2 +;; backend automatically emits the required call frame debugging information +;; while it is parsing it. Therefore, the pattern should not be modified +;; without first studying the impact of the changes on the debug info. +;; [(set (%fp) (%sp)) +;; (set (%sp) (unspec_volatile [(%sp) (-frame_size)] UNSPECV_SAVEW)) +;; (set (%i7) (%o7))] + +(define_insn "save_register_window" + [(set (reg:P 30) (reg:P 14)) + (set (reg:P 14) (unspec_volatile:P [(reg:P 14) + (match_operand:P 0 "arith_operand" "rI")] UNSPECV_SAVEW)) + (set (reg:P 31) (reg:P 15))] + "" + "save\t%%sp, %0, %%sp" + [(set_attr "type" "savew")]) + +(define_expand "epilogue" + [(return)] + "" +{ + sparc_expand_epilogue (); +}) + +(define_expand "sibcall_epilogue" + [(return)] + "" +{ + sparc_expand_epilogue (); + DONE; +}) + +(define_expand "return" + [(return)] + "sparc_can_use_return_insn_p ()" + "") + +(define_insn "*return_internal" + [(return)] + "" + "* return output_return (insn);" + [(set_attr "type" "return") + (set (attr "length") + (cond [(eq_attr "leaf_function" "true") + (if_then_else (eq_attr "empty_delay_slot" "true") + (const_int 2) + (const_int 1)) + (eq_attr "calls_eh_return" "true") + (if_then_else (eq_attr "delayed_branch" "true") + (if_then_else (eq_attr "isa" "v9") + (const_int 2) + (const_int 3)) + (const_int 4)) + (eq_attr "empty_delay_slot" "true") + (if_then_else (eq_attr "delayed_branch" "true") + (const_int 2) + (const_int 3)) + ] (const_int 1)))]) + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +;; Do not schedule instructions accessing memory before this point. + +(define_expand "frame_blockage" + [(set (match_dup 0) + (unspec:BLK [(match_dup 1)] UNSPEC_FRAME_BLOCKAGE))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; + operands[1] = stack_pointer_rtx; +}) + +(define_insn "*frame_blockage" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_operand:P 1 "" "")] UNSPEC_FRAME_BLOCKAGE))] + "" + "" + [(set_attr "length" "0")]) + +(define_expand "probe_stack" + [(set (match_operand 0 "memory_operand" "") (const_int 0))] + "" +{ + operands[0] + = adjust_address (operands[0], GET_MODE (operands[0]), SPARC_STACK_BIAS); +}) + +(define_insn "probe_stack_range" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "register_operand" "r")] + UNSPECV_PROBE_STACK_RANGE))] + "" + "* return output_probe_stack_range (operands[0], operands[2]);" + [(set_attr "type" "multi")]) + +;; Prepare to return any type including a structure value. + +(define_expand "untyped_return" + [(match_operand:BLK 0 "memory_operand" "") + (match_operand 1 "" "")] + "" +{ + rtx valreg1 = gen_rtx_REG (DImode, 24); + rtx valreg2 = gen_rtx_REG (TARGET_ARCH64 ? TFmode : DFmode, 32); + rtx result = operands[0]; + + if (! TARGET_ARCH64) + { + rtx rtnreg = gen_rtx_REG (SImode, (current_function_uses_only_leaf_regs + ? 15 : 31)); + rtx value = gen_reg_rtx (SImode); + + /* Fetch the instruction where we will return to and see if it's an unimp + instruction (the most significant 10 bits will be zero). If so, + update the return address to skip the unimp instruction. */ + emit_move_insn (value, + gen_rtx_MEM (SImode, plus_constant (rtnreg, 8))); + emit_insn (gen_lshrsi3 (value, value, GEN_INT (22))); + emit_insn (gen_update_return (rtnreg, value)); + } + + /* Reload the function value registers. */ + emit_move_insn (valreg1, adjust_address (result, DImode, 0)); + emit_move_insn (valreg2, + adjust_address (result, TARGET_ARCH64 ? TFmode : DFmode, 8)); + + /* Put USE insns before the return. */ + emit_use (valreg1); + emit_use (valreg2); + + /* Construct the return. */ + expand_naked_return (); + + DONE; +}) + +;; Adjust the return address conditionally. If the value of op1 is equal +;; to all zero then adjust the return address i.e. op0 = op0 + 4. +;; This is technically *half* the check required by the 32-bit SPARC +;; psABI. This check only ensures that an "unimp" insn was written by +;; the caller, but doesn't check to see if the expected size matches +;; (this is encoded in the 12 lower bits). This check is obsolete and +;; only used by the above code "untyped_return". + +(define_insn "update_return" + [(unspec:SI [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "r")] UNSPEC_UPDATE_RETURN)] + "! TARGET_ARCH64" +{ + if (flag_delayed_branch) + return "cmp\t%1, 0\n\tbe,a\t.+8\n\t add\t%0, 4, %0"; + else + return "cmp\t%1, 0\n\tbne\t.+12\n\t nop\n\tadd\t%0, 4, %0"; +} + [(set (attr "type") (const_string "multi")) + (set (attr "length") + (if_then_else (eq_attr "delayed_branch" "true") + (const_int 3) + (const_int 4)))]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop") + +(define_expand "indirect_jump" + [(set (pc) (match_operand 0 "address_operand" "p"))] + "" + "") + +(define_insn "*branch_sp32" + [(set (pc) (match_operand:SI 0 "address_operand" "p"))] + "! TARGET_ARCH64" + "jmp\t%a0%#" + [(set_attr "type" "uncond_branch")]) + +(define_insn "*branch_sp64" + [(set (pc) (match_operand:DI 0 "address_operand" "p"))] + "TARGET_ARCH64" + "jmp\t%a0%#" + [(set_attr "type" "uncond_branch")]) + +(define_expand "nonlocal_goto" + [(match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "") + (match_operand:SI 3 "" "")] + "" +{ + rtx lab = operands[1]; + rtx stack = operands[2]; + rtx fp = operands[3]; + rtx labreg; + + /* Trap instruction to flush all the register windows. */ + emit_insn (gen_flush_register_windows ()); + + /* Load the fp value for the containing fn into %fp. This is needed + because STACK refers to %fp. Note that virtual register instantiation + fails if the virtual %fp isn't set from a register. */ + if (GET_CODE (fp) != REG) + fp = force_reg (Pmode, fp); + emit_move_insn (virtual_stack_vars_rtx, fp); + + /* Find the containing function's current nonlocal goto handler, + which will do any cleanups and then jump to the label. */ + labreg = gen_rtx_REG (Pmode, 8); + emit_move_insn (labreg, lab); + + /* Restore %fp from stack pointer value for containing function. + The restore insn that follows will move this to %sp, + and reload the appropriate value into %fp. */ + emit_move_insn (hard_frame_pointer_rtx, stack); + + emit_use (stack_pointer_rtx); + + /* ??? The V9-specific version was disabled in rev 1.65. */ + emit_jump_insn (gen_goto_handler_and_restore (labreg)); + emit_barrier (); + DONE; +}) + +;; Special trap insn to flush register windows. +(define_insn "flush_register_windows" + [(unspec_volatile [(const_int 0)] UNSPECV_FLUSHW)] + "" + { return TARGET_V9 ? "flushw" : "ta\t3"; } + [(set_attr "type" "flushw")]) + +(define_insn "goto_handler_and_restore" + [(unspec_volatile [(match_operand 0 "register_operand" "=r")] UNSPECV_GOTO)] + "GET_MODE (operands[0]) == Pmode" +{ + if (flag_delayed_branch) + return "jmp\t%0\n\t restore"; + else + return "mov\t%0,%%g1\n\trestore\n\tjmp\t%%g1\n\t nop"; +} + [(set (attr "type") (const_string "multi")) + (set (attr "length") + (if_then_else (eq_attr "delayed_branch" "true") + (const_int 2) + (const_int 4)))]) + +;; For __builtin_setjmp we need to flush register windows iff the function +;; calls alloca as well, because otherwise the current register window might +;; be saved after the %sp adjustment and thus setjmp would crash. +(define_expand "builtin_setjmp_setup" + [(match_operand 0 "register_operand" "r")] + "" +{ + emit_insn (gen_do_builtin_setjmp_setup ()); + DONE; +}) + +(define_insn "do_builtin_setjmp_setup" + [(unspec_volatile [(const_int 0)] UNSPECV_SETJMP)] + "" +{ + if (!cfun->calls_alloca) + return ""; + if (!TARGET_V9) + return "ta\t3"; + fputs ("\tflushw\n", asm_out_file); + if (flag_pic) + fprintf (asm_out_file, "\tst%c\t%%l7, [%%sp+%d]\n", + TARGET_ARCH64 ? 'x' : 'w', + SPARC_STACK_BIAS + 7 * UNITS_PER_WORD); + fprintf (asm_out_file, "\tst%c\t%%fp, [%%sp+%d]\n", + TARGET_ARCH64 ? 'x' : 'w', + SPARC_STACK_BIAS + 14 * UNITS_PER_WORD); + fprintf (asm_out_file, "\tst%c\t%%i7, [%%sp+%d]\n", + TARGET_ARCH64 ? 'x' : 'w', + SPARC_STACK_BIAS + 15 * UNITS_PER_WORD); + return ""; +} + [(set_attr "type" "multi") + (set (attr "length") + (cond [(eq_attr "calls_alloca" "false") + (const_int 0) + (eq_attr "isa" "!v9") + (const_int 1) + (eq_attr "pic" "true") + (const_int 4)] (const_int 3)))]) + +;; Pattern for use after a setjmp to store registers into the save area. + +(define_expand "setjmp" + [(const_int 0)] + "" +{ + rtx mem; + + if (flag_pic) + { + mem = gen_rtx_MEM (Pmode, + plus_constant (stack_pointer_rtx, + SPARC_STACK_BIAS + 7 * UNITS_PER_WORD)); + emit_insn (gen_rtx_SET (VOIDmode, mem, pic_offset_table_rtx)); + } + + mem = gen_rtx_MEM (Pmode, + plus_constant (stack_pointer_rtx, + SPARC_STACK_BIAS + 14 * UNITS_PER_WORD)); + emit_insn (gen_rtx_SET (VOIDmode, mem, hard_frame_pointer_rtx)); + + mem = gen_rtx_MEM (Pmode, + plus_constant (stack_pointer_rtx, + SPARC_STACK_BIAS + 15 * UNITS_PER_WORD)); + emit_insn (gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (Pmode, 31))); + DONE; +}) + +;; Special pattern for the FLUSH instruction. + +; We do SImode and DImode versions of this to quiet down genrecog's complaints +; of the define_insn otherwise missing a mode. We make "flush", aka +; gen_flush, the default one since sparc_initialize_trampoline uses +; it on SImode mem values. + +(define_insn "flush" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] UNSPECV_FLUSH)] + "" + { return TARGET_V9 ? "flush\t%f0" : "iflush\t%f0"; } + [(set_attr "type" "iflush")]) + +(define_insn "flushdi" + [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_FLUSH)] + "" + { return TARGET_V9 ? "flush\t%f0" : "iflush\t%f0"; } + [(set_attr "type" "iflush")]) + + +;; Find first set instructions. + +;; The scan instruction searches from the most significant bit while ffs +;; searches from the least significant bit. The bit index and treatment of +;; zero also differ. It takes at least 7 instructions to get the proper +;; result. Here is an obvious 8 instruction sequence. + +;; XXX +(define_insn "ffssi2" + [(set (match_operand:SI 0 "register_operand" "=&r") + (ffs:SI (match_operand:SI 1 "register_operand" "r"))) + (clobber (match_scratch:SI 2 "=&r"))] + "TARGET_SPARCLITE || TARGET_SPARCLET" +{ + return "sub\t%%g0, %1, %0\;and\t%0, %1, %0\;scan\t%0, 0, %0\;mov\t32, %2\;sub\t%2, %0, %0\;sra\t%0, 31, %2\;and\t%2, 31, %2\;add\t%2, %0, %0"; +} + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +;; ??? This should be a define expand, so that the extra instruction have +;; a chance of being optimized away. + +;; Disabled because none of the UltraSPARCs implement popc. The HAL R1 +;; does, but no one uses that and we don't have a switch for it. +; +;(define_insn "ffsdi2" +; [(set (match_operand:DI 0 "register_operand" "=&r") +; (ffs:DI (match_operand:DI 1 "register_operand" "r"))) +; (clobber (match_scratch:DI 2 "=&r"))] +; "TARGET_ARCH64" +; "neg\t%1, %2\;xnor\t%1, %2, %2\;popc\t%2, %0\;movzr\t%1, 0, %0" +; [(set_attr "type" "multi") +; (set_attr "length" "4")]) + + + +;; Peepholes go at the end. + +;; Optimize consecutive loads or stores into ldd and std when possible. +;; The conditions in which we do this are very restricted and are +;; explained in the code for {registers,memory}_ok_for_ldd functions. + +(define_peephole2 + [(set (match_operand:SI 0 "memory_operand" "") + (const_int 0)) + (set (match_operand:SI 1 "memory_operand" "") + (const_int 0))] + "TARGET_V9 + && mems_ok_for_ldd_peep (operands[0], operands[1], NULL_RTX)" + [(set (match_dup 0) + (const_int 0))] + "operands[0] = widen_memory_access (operands[0], DImode, 0);") + +(define_peephole2 + [(set (match_operand:SI 0 "memory_operand" "") + (const_int 0)) + (set (match_operand:SI 1 "memory_operand" "") + (const_int 0))] + "TARGET_V9 + && mems_ok_for_ldd_peep (operands[1], operands[0], NULL_RTX)" + [(set (match_dup 1) + (const_int 0))] + "operands[1] = widen_memory_access (operands[1], DImode, 0);") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "registers_ok_for_ldd_peep (operands[0], operands[2]) + && mems_ok_for_ldd_peep (operands[1], operands[3], operands[0])" + [(set (match_dup 0) + (match_dup 1))] + "operands[1] = widen_memory_access (operands[1], DImode, 0); + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));") + +(define_peephole2 + [(set (match_operand:SI 0 "memory_operand" "") + (match_operand:SI 1 "register_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 3 "register_operand" ""))] + "registers_ok_for_ldd_peep (operands[1], operands[3]) + && mems_ok_for_ldd_peep (operands[0], operands[2], NULL_RTX)" + [(set (match_dup 0) + (match_dup 1))] + "operands[0] = widen_memory_access (operands[0], DImode, 0); + operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));") + +(define_peephole2 + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "memory_operand" "")) + (set (match_operand:SF 2 "register_operand" "") + (match_operand:SF 3 "memory_operand" ""))] + "registers_ok_for_ldd_peep (operands[0], operands[2]) + && mems_ok_for_ldd_peep (operands[1], operands[3], operands[0])" + [(set (match_dup 0) + (match_dup 1))] + "operands[1] = widen_memory_access (operands[1], DFmode, 0); + operands[0] = gen_rtx_REG (DFmode, REGNO (operands[0]));") + +(define_peephole2 + [(set (match_operand:SF 0 "memory_operand" "") + (match_operand:SF 1 "register_operand" "")) + (set (match_operand:SF 2 "memory_operand" "") + (match_operand:SF 3 "register_operand" ""))] + "registers_ok_for_ldd_peep (operands[1], operands[3]) + && mems_ok_for_ldd_peep (operands[0], operands[2], NULL_RTX)" + [(set (match_dup 0) + (match_dup 1))] + "operands[0] = widen_memory_access (operands[0], DFmode, 0); + operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "registers_ok_for_ldd_peep (operands[2], operands[0]) + && mems_ok_for_ldd_peep (operands[3], operands[1], operands[0])" + [(set (match_dup 2) + (match_dup 3))] + "operands[3] = widen_memory_access (operands[3], DImode, 0); + operands[2] = gen_rtx_REG (DImode, REGNO (operands[2]));") + +(define_peephole2 + [(set (match_operand:SI 0 "memory_operand" "") + (match_operand:SI 1 "register_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 3 "register_operand" ""))] + "registers_ok_for_ldd_peep (operands[3], operands[1]) + && mems_ok_for_ldd_peep (operands[2], operands[0], NULL_RTX)" + [(set (match_dup 2) + (match_dup 3))] + "operands[2] = widen_memory_access (operands[2], DImode, 0); + operands[3] = gen_rtx_REG (DImode, REGNO (operands[3])); + ") + +(define_peephole2 + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "memory_operand" "")) + (set (match_operand:SF 2 "register_operand" "") + (match_operand:SF 3 "memory_operand" ""))] + "registers_ok_for_ldd_peep (operands[2], operands[0]) + && mems_ok_for_ldd_peep (operands[3], operands[1], operands[0])" + [(set (match_dup 2) + (match_dup 3))] + "operands[3] = widen_memory_access (operands[3], DFmode, 0); + operands[2] = gen_rtx_REG (DFmode, REGNO (operands[2]));") + +(define_peephole2 + [(set (match_operand:SF 0 "memory_operand" "") + (match_operand:SF 1 "register_operand" "")) + (set (match_operand:SF 2 "memory_operand" "") + (match_operand:SF 3 "register_operand" ""))] + "registers_ok_for_ldd_peep (operands[3], operands[1]) + && mems_ok_for_ldd_peep (operands[2], operands[0], NULL_RTX)" + [(set (match_dup 2) + (match_dup 3))] + "operands[2] = widen_memory_access (operands[2], DFmode, 0); + operands[3] = gen_rtx_REG (DFmode, REGNO (operands[3]));") + +;; Optimize the case of following a reg-reg move with a test +;; of reg just moved. Don't allow floating point regs for operand 0 or 1. +;; This can result from a float to fix conversion. + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")) + (set (reg:CC 100) + (compare:CC (match_operand:SI 2 "register_operand" "") + (const_int 0)))] + "(rtx_equal_p (operands[2], operands[0]) + || rtx_equal_p (operands[2], operands[1])) + && ! SPARC_FP_REG_P (REGNO (operands[0])) + && ! SPARC_FP_REG_P (REGNO (operands[1]))" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (reg:CC 100) + (compare:CC (match_dup 1) (const_int 0)))])] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "")) + (set (reg:CCX 100) + (compare:CCX (match_operand:DI 2 "register_operand" "") + (const_int 0)))] + "TARGET_ARCH64 + && (rtx_equal_p (operands[2], operands[0]) + || rtx_equal_p (operands[2], operands[1])) + && ! SPARC_FP_REG_P (REGNO (operands[0])) + && ! SPARC_FP_REG_P (REGNO (operands[1]))" + [(parallel [(set (match_dup 0) (match_dup 1)) + (set (reg:CCX 100) + (compare:CCX (match_dup 1) (const_int 0)))])] + "") + + +;; Prefetch instructions. + +;; ??? UltraSPARC-III note: A memory operation loading into the floating point register +;; ??? file, if it hits the prefetch cache, has a chance to dual-issue with other memory +;; ??? operations. With DFA we might be able to model this, but it requires a lot of +;; ??? state. +(define_expand "prefetch" + [(match_operand 0 "address_operand" "") + (match_operand 1 "const_int_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_V9" +{ + if (TARGET_ARCH64) + emit_insn (gen_prefetch_64 (operands[0], operands[1], operands[2])); + else + emit_insn (gen_prefetch_32 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "prefetch_64" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (match_operand:DI 1 "const_int_operand" "n") + (match_operand:DI 2 "const_int_operand" "n"))] + "" +{ + static const char * const prefetch_instr[2][2] = { + { + "prefetch\t[%a0], 1", /* no locality: prefetch for one read */ + "prefetch\t[%a0], 0", /* medium to high locality: prefetch for several reads */ + }, + { + "prefetch\t[%a0], 3", /* no locality: prefetch for one write */ + "prefetch\t[%a0], 2", /* medium to high locality: prefetch for several writes */ + } + }; + int read_or_write = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); + + gcc_assert (read_or_write == 0 || read_or_write == 1); + gcc_assert (locality >= 0 && locality < 4); + return prefetch_instr [read_or_write][locality == 0 ? 0 : 1]; +} + [(set_attr "type" "load")]) + +(define_insn "prefetch_32" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n"))] + "" +{ + static const char * const prefetch_instr[2][2] = { + { + "prefetch\t[%a0], 1", /* no locality: prefetch for one read */ + "prefetch\t[%a0], 0", /* medium to high locality: prefetch for several reads */ + }, + { + "prefetch\t[%a0], 3", /* no locality: prefetch for one write */ + "prefetch\t[%a0], 2", /* medium to high locality: prefetch for several writes */ + } + }; + int read_or_write = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); + + gcc_assert (read_or_write == 0 || read_or_write == 1); + gcc_assert (locality >= 0 && locality < 4); + return prefetch_instr [read_or_write][locality == 0 ? 0 : 1]; +} + [(set_attr "type" "load")]) + + +;; Trap instructions. + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 5))] + "" + "ta\t5" + [(set_attr "type" "trap")]) + +(define_expand "ctrapsi4" + [(trap_if (match_operator 0 "noov_compare_operator" + [(match_operand:SI 1 "compare_operand" "") + (match_operand:SI 2 "arith_operand" "")]) + (match_operand 3 ""))] + "" + "operands[1] = gen_compare_reg (operands[0]); + if (GET_MODE (operands[1]) != CCmode && GET_MODE (operands[1]) != CCXmode) + FAIL; + operands[2] = const0_rtx;") + +(define_expand "ctrapdi4" + [(trap_if (match_operator 0 "noov_compare_operator" + [(match_operand:DI 1 "compare_operand" "") + (match_operand:DI 2 "arith_operand" "")]) + (match_operand 3 ""))] + "TARGET_ARCH64" + "operands[1] = gen_compare_reg (operands[0]); + if (GET_MODE (operands[1]) != CCmode && GET_MODE (operands[1]) != CCXmode) + FAIL; + operands[2] = const0_rtx;") + + +(define_insn "" + [(trap_if (match_operator 0 "noov_compare_operator" [(reg:CC 100) (const_int 0)]) + (match_operand:SI 1 "arith_operand" "rM"))] + "" +{ + if (TARGET_V9) + return "t%C0\t%%icc, %1"; + else + return "t%C0\t%1"; +} + [(set_attr "type" "trap")]) + +(define_insn "" + [(trap_if (match_operator 0 "noov_compare_operator" [(reg:CCX 100) (const_int 0)]) + (match_operand:SI 1 "arith_operand" "rM"))] + "TARGET_V9" + "t%C0\t%%xcc, %1" + [(set_attr "type" "trap")]) + + +;; TLS support instructions. + +(define_insn "tgd_hi22" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")] + UNSPEC_TLSGD)))] + "TARGET_TLS" + "sethi\\t%%tgd_hi22(%a1), %0") + +(define_insn "tgd_lo10" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand 2 "tgd_symbolic_operand" "")] + UNSPEC_TLSGD)))] + "TARGET_TLS" + "add\\t%1, %%tgd_lo10(%a2), %0") + +(define_insn "tgd_add32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tgd_symbolic_operand" "")] + UNSPEC_TLSGD)))] + "TARGET_TLS && TARGET_ARCH32" + "add\\t%1, %2, %0, %%tgd_add(%a3)") + +(define_insn "tgd_add64" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tgd_symbolic_operand" "")] + UNSPEC_TLSGD)))] + "TARGET_TLS && TARGET_ARCH64" + "add\\t%1, %2, %0, %%tgd_add(%a3)") + +(define_insn "tgd_call32" + [(set (match_operand 0 "register_operand" "=r") + (call (mem:SI (unspec:SI [(match_operand:SI 1 "symbolic_operand" "s") + (match_operand 2 "tgd_symbolic_operand" "")] + UNSPEC_TLSGD)) + (match_operand 3 "" ""))) + (clobber (reg:SI 15))] + "TARGET_TLS && TARGET_ARCH32" + "call\t%a1, %%tgd_call(%a2)%#" + [(set_attr "type" "call")]) + +(define_insn "tgd_call64" + [(set (match_operand 0 "register_operand" "=r") + (call (mem:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "s") + (match_operand 2 "tgd_symbolic_operand" "")] + UNSPEC_TLSGD)) + (match_operand 3 "" ""))) + (clobber (reg:DI 15))] + "TARGET_TLS && TARGET_ARCH64" + "call\t%a1, %%tgd_call(%a2)%#" + [(set_attr "type" "call")]) + +(define_insn "tldm_hi22" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI (unspec:SI [(const_int 0)] UNSPEC_TLSLDM)))] + "TARGET_TLS" + "sethi\\t%%tldm_hi22(%&), %0") + +(define_insn "tldm_lo10" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(const_int 0)] UNSPEC_TLSLDM)))] + "TARGET_TLS" + "add\\t%1, %%tldm_lo10(%&), %0") + +(define_insn "tldm_add32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand:SI 2 "register_operand" "r")] + UNSPEC_TLSLDM)))] + "TARGET_TLS && TARGET_ARCH32" + "add\\t%1, %2, %0, %%tldm_add(%&)") + +(define_insn "tldm_add64" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:SI 2 "register_operand" "r")] + UNSPEC_TLSLDM)))] + "TARGET_TLS && TARGET_ARCH64" + "add\\t%1, %2, %0, %%tldm_add(%&)") + +(define_insn "tldm_call32" + [(set (match_operand 0 "register_operand" "=r") + (call (mem:SI (unspec:SI [(match_operand:SI 1 "symbolic_operand" "s")] + UNSPEC_TLSLDM)) + (match_operand 2 "" ""))) + (clobber (reg:SI 15))] + "TARGET_TLS && TARGET_ARCH32" + "call\t%a1, %%tldm_call(%&)%#" + [(set_attr "type" "call")]) + +(define_insn "tldm_call64" + [(set (match_operand 0 "register_operand" "=r") + (call (mem:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "s")] + UNSPEC_TLSLDM)) + (match_operand 2 "" ""))) + (clobber (reg:DI 15))] + "TARGET_TLS && TARGET_ARCH64" + "call\t%a1, %%tldm_call(%&)%#" + [(set_attr "type" "call")]) + +(define_insn "tldo_hix22" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO)))] + "TARGET_TLS" + "sethi\\t%%tldo_hix22(%a1), %0") + +(define_insn "tldo_lox10" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand 2 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO)))] + "TARGET_TLS" + "xor\\t%1, %%tldo_lox10(%a2), %0") + +(define_insn "tldo_add32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO)))] + "TARGET_TLS && TARGET_ARCH32" + "add\\t%1, %2, %0, %%tldo_add(%a3)") + +(define_insn "tldo_add64" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO)))] + "TARGET_TLS && TARGET_ARCH64" + "add\\t%1, %2, %0, %%tldo_add(%a3)") + +(define_insn "tie_hi22" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")] + UNSPEC_TLSIE)))] + "TARGET_TLS" + "sethi\\t%%tie_hi22(%a1), %0") + +(define_insn "tie_lo10" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand 2 "tie_symbolic_operand" "")] + UNSPEC_TLSIE)))] + "TARGET_TLS" + "add\\t%1, %%tie_lo10(%a2), %0") + +(define_insn "tie_ld32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tie_symbolic_operand" "")] + UNSPEC_TLSIE))] + "TARGET_TLS && TARGET_ARCH32" + "ld\\t[%1 + %2], %0, %%tie_ld(%a3)" + [(set_attr "type" "load")]) + +(define_insn "tie_ld64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tie_symbolic_operand" "")] + UNSPEC_TLSIE))] + "TARGET_TLS && TARGET_ARCH64" + "ldx\\t[%1 + %2], %0, %%tie_ldx(%a3)" + [(set_attr "type" "load")]) + +(define_insn "tie_add32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tie_symbolic_operand" "")] + UNSPEC_TLSIE)))] + "TARGET_SUN_TLS && TARGET_ARCH32" + "add\\t%1, %2, %0, %%tie_add(%a3)") + +(define_insn "tie_add64" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:DI 2 "register_operand" "r") + (match_operand 3 "tie_symbolic_operand" "")] + UNSPEC_TLSIE)))] + "TARGET_SUN_TLS && TARGET_ARCH64" + "add\\t%1, %2, %0, %%tie_add(%a3)") + +(define_insn "tle_hix22_sp32" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI (unspec:SI [(match_operand 1 "tle_symbolic_operand" "")] + UNSPEC_TLSLE)))] + "TARGET_TLS && TARGET_ARCH32" + "sethi\\t%%tle_hix22(%a1), %0") + +(define_insn "tle_lox10_sp32" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand 2 "tle_symbolic_operand" "")] + UNSPEC_TLSLE)))] + "TARGET_TLS && TARGET_ARCH32" + "xor\\t%1, %%tle_lox10(%a2), %0") + +(define_insn "tle_hix22_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (high:DI (unspec:DI [(match_operand 1 "tle_symbolic_operand" "")] + UNSPEC_TLSLE)))] + "TARGET_TLS && TARGET_ARCH64" + "sethi\\t%%tle_hix22(%a1), %0") + +(define_insn "tle_lox10_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand 2 "tle_symbolic_operand" "")] + UNSPEC_TLSLE)))] + "TARGET_TLS && TARGET_ARCH64" + "xor\\t%1, %%tle_lox10(%a2), %0") + +;; Now patterns combining tldo_add{32,64} with some integer loads or stores +(define_insn "*tldo_ldub_sp32" + [(set (match_operand:QI 0 "register_operand" "=r") + (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r"))))] + "TARGET_TLS && TARGET_ARCH32" + "ldub\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldub1_sp32" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH32" + "ldub\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldub2_sp32" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH32" + "ldub\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldsb1_sp32" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH32" + "ldsb\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldsb2_sp32" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH32" + "ldsb\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldub_sp64" + [(set (match_operand:QI 0 "register_operand" "=r") + (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r"))))] + "TARGET_TLS && TARGET_ARCH64" + "ldub\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldub1_sp64" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "ldub\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldub2_sp64" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "ldub\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldub3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "ldub\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldsb1_sp64" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "ldsb\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldsb2_sp64" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "ldsb\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldsb3_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "ldsb\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_lduh_sp32" + [(set (match_operand:HI 0 "register_operand" "=r") + (mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r"))))] + "TARGET_TLS && TARGET_ARCH32" + "lduh\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_lduh1_sp32" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH32" + "lduh\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldsh1_sp32" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH32" + "ldsh\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_lduh_sp64" + [(set (match_operand:HI 0 "register_operand" "=r") + (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r"))))] + "TARGET_TLS && TARGET_ARCH64" + "lduh\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_lduh1_sp64" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "lduh\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_lduh2_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "lduh\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldsh1_sp64" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "ldsh\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldsh2_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "ldsh\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_lduw_sp32" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r"))))] + "TARGET_TLS && TARGET_ARCH32" + "ld\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load")]) + +(define_insn "*tldo_lduw_sp64" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r"))))] + "TARGET_TLS && TARGET_ARCH64" + "lduw\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load")]) + +(define_insn "*tldo_lduw1_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "lduw\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load")]) + +(define_insn "*tldo_ldsw1_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r")))))] + "TARGET_TLS && TARGET_ARCH64" + "ldsw\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "sload") + (set_attr "us3load_type" "3cycle")]) + +(define_insn "*tldo_ldx_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (mem:DI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r"))))] + "TARGET_TLS && TARGET_ARCH64" + "ldx\t[%1 + %2], %0, %%tldo_add(%3)" + [(set_attr "type" "load")]) + +(define_insn "*tldo_stb_sp32" + [(set (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r"))) + (match_operand:QI 0 "register_operand" "r"))] + "TARGET_TLS && TARGET_ARCH32" + "stb\t%0, [%1 + %2], %%tldo_add(%3)" + [(set_attr "type" "store")]) + +(define_insn "*tldo_stb_sp64" + [(set (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r"))) + (match_operand:QI 0 "register_operand" "r"))] + "TARGET_TLS && TARGET_ARCH64" + "stb\t%0, [%1 + %2], %%tldo_add(%3)" + [(set_attr "type" "store")]) + +(define_insn "*tldo_sth_sp32" + [(set (mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r"))) + (match_operand:HI 0 "register_operand" "r"))] + "TARGET_TLS && TARGET_ARCH32" + "sth\t%0, [%1 + %2], %%tldo_add(%3)" + [(set_attr "type" "store")]) + +(define_insn "*tldo_sth_sp64" + [(set (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r"))) + (match_operand:HI 0 "register_operand" "r"))] + "TARGET_TLS && TARGET_ARCH64" + "sth\t%0, [%1 + %2], %%tldo_add(%3)" + [(set_attr "type" "store")]) + +(define_insn "*tldo_stw_sp32" + [(set (mem:SI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:SI 1 "register_operand" "r"))) + (match_operand:SI 0 "register_operand" "r"))] + "TARGET_TLS && TARGET_ARCH32" + "st\t%0, [%1 + %2], %%tldo_add(%3)" + [(set_attr "type" "store")]) + +(define_insn "*tldo_stw_sp64" + [(set (mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r"))) + (match_operand:SI 0 "register_operand" "r"))] + "TARGET_TLS && TARGET_ARCH64" + "stw\t%0, [%1 + %2], %%tldo_add(%3)" + [(set_attr "type" "store")]) + +(define_insn "*tldo_stx_sp64" + [(set (mem:DI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r") + (match_operand 3 "tld_symbolic_operand" "")] + UNSPEC_TLSLDO) + (match_operand:DI 1 "register_operand" "r"))) + (match_operand:DI 0 "register_operand" "r"))] + "TARGET_TLS && TARGET_ARCH64" + "stx\t%0, [%1 + %2], %%tldo_add(%3)" + [(set_attr "type" "store")]) + + +;; Stack protector instructions. + +(define_expand "stack_protect_set" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")] + "" +{ +#ifdef TARGET_THREAD_SSP_OFFSET + rtx tlsreg = gen_rtx_REG (Pmode, 7); + rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET)); + operands[1] = gen_rtx_MEM (Pmode, addr); +#endif + if (TARGET_ARCH64) + emit_insn (gen_stack_protect_setdi (operands[0], operands[1])); + else + emit_insn (gen_stack_protect_setsi (operands[0], operands[1])); + DONE; +}) + +(define_insn "stack_protect_setsi" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0))] + "TARGET_ARCH32" + "ld\t%1, %2\;st\t%2, %0\;mov\t0, %2" + [(set_attr "type" "multi") + (set_attr "length" "3")]) + +(define_insn "stack_protect_setdi" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0))] + "TARGET_ARCH64" + "ldx\t%1, %2\;stx\t%2, %0\;mov\t0, %2" + [(set_attr "type" "multi") + (set_attr "length" "3")]) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "") + (match_operand 2 "" "")] + "" +{ + rtx result, test; +#ifdef TARGET_THREAD_SSP_OFFSET + rtx tlsreg = gen_rtx_REG (Pmode, 7); + rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET)); + operands[1] = gen_rtx_MEM (Pmode, addr); +#endif + if (TARGET_ARCH64) + { + result = gen_reg_rtx (Pmode); + emit_insn (gen_stack_protect_testdi (result, operands[0], operands[1])); + test = gen_rtx_EQ (VOIDmode, result, const0_rtx); + emit_jump_insn (gen_cbranchdi4 (test, result, const0_rtx, operands[2])); + } + else + { + emit_insn (gen_stack_protect_testsi (operands[0], operands[1])); + result = gen_rtx_REG (CCmode, SPARC_ICC_REG); + test = gen_rtx_EQ (VOIDmode, result, const0_rtx); + emit_jump_insn (gen_cbranchcc4 (test, result, const0_rtx, operands[2])); + } + DONE; +}) + +(define_insn "stack_protect_testsi" + [(set (reg:CC 100) + (unspec:CC [(match_operand:SI 0 "memory_operand" "m") + (match_operand:SI 1 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (set (match_scratch:SI 3 "=r") (const_int 0)) + (clobber (match_scratch:SI 2 "=&r"))] + "TARGET_ARCH32" + "ld\t%0, %2\;ld\t%1, %3\;xorcc\t%2, %3, %2\;mov\t0, %3" + [(set_attr "type" "multi") + (set_attr "length" "4")]) + +(define_insn "stack_protect_testdi" + [(set (match_operand:DI 0 "register_operand" "=&r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (set (match_scratch:DI 3 "=r") (const_int 0))] + "TARGET_ARCH64" + "ldx\t%1, %0\;ldx\t%2, %3\;xor\t%0, %3, %0\;mov\t0, %3" + [(set_attr "type" "multi") + (set_attr "length" "4")]) + + +;; Vector instructions. + +(define_insn "addv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=e") + (plus:V2SI (match_operand:V2SI 1 "register_operand" "e") + (match_operand:V2SI 2 "register_operand" "e")))] + "TARGET_VIS" + "fpadd32\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +(define_insn "addv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=e") + (plus:V4HI (match_operand:V4HI 1 "register_operand" "e") + (match_operand:V4HI 2 "register_operand" "e")))] + "TARGET_VIS" + "fpadd16\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +;; fpadd32s is emitted by the addsi3 pattern. + +(define_insn "addv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=f") + (plus:V2HI (match_operand:V2HI 1 "register_operand" "f") + (match_operand:V2HI 2 "register_operand" "f")))] + "TARGET_VIS" + "fpadd16s\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "single")]) + +(define_insn "subv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=e") + (minus:V2SI (match_operand:V2SI 1 "register_operand" "e") + (match_operand:V2SI 2 "register_operand" "e")))] + "TARGET_VIS" + "fpsub32\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +(define_insn "subv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=e") + (minus:V4HI (match_operand:V4HI 1 "register_operand" "e") + (match_operand:V4HI 2 "register_operand" "e")))] + "TARGET_VIS" + "fpsub16\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +;; fpsub32s is emitted by the subsi3 pattern. + +(define_insn "subv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=f") + (minus:V2HI (match_operand:V2HI 1 "register_operand" "f") + (match_operand:V2HI 2 "register_operand" "f")))] + "TARGET_VIS" + "fpsub16s\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "single")]) + +;; All other logical instructions have integer equivalents so they +;; are defined together. + +;; (ior (not (op1)) (not (op2))) is the canonical form of NAND. + +(define_insn "*nand_vis" + [(set (match_operand:V64 0 "register_operand" "=e") + (ior:V64 (not:V64 (match_operand:V64 1 "register_operand" "e")) + (not:V64 (match_operand:V64 2 "register_operand" "e"))))] + "TARGET_VIS" + "fnand\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +(define_insn "*nand_vis" + [(set (match_operand:V32 0 "register_operand" "=f") + (ior:V32 (not:V32 (match_operand:V32 1 "register_operand" "f")) + (not:V32 (match_operand:V32 2 "register_operand" "f"))))] + "TARGET_VIS" + "fnands\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "single")]) + +;; Hard to generate VIS instructions. We have builtins for these. + +(define_insn "fpack16_vis" + [(set (match_operand:V4QI 0 "register_operand" "=f") + (unspec:V4QI [(match_operand:V4HI 1 "register_operand" "e")] + UNSPEC_FPACK16))] + "TARGET_VIS" + "fpack16\t%1, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +(define_insn "fpackfix_vis" + [(set (match_operand:V2HI 0 "register_operand" "=f") + (unspec:V2HI [(match_operand:V2SI 1 "register_operand" "e")] + UNSPEC_FPACKFIX))] + "TARGET_VIS" + "fpackfix\t%1, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +(define_insn "fpack32_vis" + [(set (match_operand:V8QI 0 "register_operand" "=e") + (unspec:V8QI [(match_operand:V2SI 1 "register_operand" "e") + (match_operand:V8QI 2 "register_operand" "e")] + UNSPEC_FPACK32))] + "TARGET_VIS" + "fpack32\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +(define_insn "fexpand_vis" + [(set (match_operand:V4HI 0 "register_operand" "=e") + (unspec:V4HI [(match_operand:V4QI 1 "register_operand" "f")] + UNSPEC_FEXPAND))] + "TARGET_VIS" + "fexpand\t%1, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +;; It may be possible to describe this operation as (1 indexed): +;; (vec_select (vec_duplicate (vec_duplicate (vec_concat 1 2))) +;; 1,5,10,14,19,23,28,32) +;; Note that (vec_merge:V8QI [(V4QI) (V4QI)] (10101010 = 170) doesn't work +;; because vec_merge expects all the operands to be of the same type. +(define_insn "fpmerge_vis" + [(set (match_operand:V8QI 0 "register_operand" "=e") + (unspec:V8QI [(match_operand:V4QI 1 "register_operand" "f") + (match_operand:V4QI 2 "register_operand" "f")] + UNSPEC_FPMERGE))] + "TARGET_VIS" + "fpmerge\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +;; Partitioned multiply instructions +(define_insn "fmul8x16_vis" + [(set (match_operand:V4HI 0 "register_operand" "=e") + (mult:V4HI (match_operand:V4QI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "e")))] + "TARGET_VIS" + "fmul8x16\t%1, %2, %0" + [(set_attr "type" "fpmul") + (set_attr "fptype" "double")]) + +;; Only one of the following two insns can be a multiply. +(define_insn "fmul8x16au_vis" + [(set (match_operand:V4HI 0 "register_operand" "=e") + (mult:V4HI (match_operand:V4QI 1 "register_operand" "f") + (match_operand:V2HI 2 "register_operand" "f")))] + "TARGET_VIS" + "fmul8x16au\t%1, %2, %0" + [(set_attr "type" "fpmul") + (set_attr "fptype" "double")]) + +(define_insn "fmul8x16al_vis" + [(set (match_operand:V4HI 0 "register_operand" "=e") + (unspec:V4HI [(match_operand:V4QI 1 "register_operand" "f") + (match_operand:V2HI 2 "register_operand" "f")] + UNSPEC_MUL16AL))] + "TARGET_VIS" + "fmul8x16al\t%1, %2, %0" + [(set_attr "type" "fpmul") + (set_attr "fptype" "double")]) + +;; Only one of the following two insns can be a multiply. +(define_insn "fmul8sux16_vis" + [(set (match_operand:V4HI 0 "register_operand" "=e") + (mult:V4HI (match_operand:V8QI 1 "register_operand" "e") + (match_operand:V4HI 2 "register_operand" "e")))] + "TARGET_VIS" + "fmul8sux16\t%1, %2, %0" + [(set_attr "type" "fpmul") + (set_attr "fptype" "double")]) + +(define_insn "fmul8ulx16_vis" + [(set (match_operand:V4HI 0 "register_operand" "=e") + (unspec:V4HI [(match_operand:V8QI 1 "register_operand" "e") + (match_operand:V4HI 2 "register_operand" "e")] + UNSPEC_MUL8UL))] + "TARGET_VIS" + "fmul8ulx16\t%1, %2, %0" + [(set_attr "type" "fpmul") + (set_attr "fptype" "double")]) + +;; Only one of the following two insns can be a multiply. +(define_insn "fmuld8sux16_vis" + [(set (match_operand:V2SI 0 "register_operand" "=e") + (mult:V2SI (match_operand:V4QI 1 "register_operand" "f") + (match_operand:V2HI 2 "register_operand" "f")))] + "TARGET_VIS" + "fmuld8sux16\t%1, %2, %0" + [(set_attr "type" "fpmul") + (set_attr "fptype" "double")]) + +(define_insn "fmuld8ulx16_vis" + [(set (match_operand:V2SI 0 "register_operand" "=e") + (unspec:V2SI [(match_operand:V4QI 1 "register_operand" "f") + (match_operand:V2HI 2 "register_operand" "f")] + UNSPEC_MULDUL))] + "TARGET_VIS" + "fmuld8ulx16\t%1, %2, %0" + [(set_attr "type" "fpmul") + (set_attr "fptype" "double")]) + +;; Using faligndata only makes sense after an alignaddr since the choice of +;; bytes to take out of each operand is dependent on the results of the last +;; alignaddr. +(define_insn "faligndata_vis" + [(set (match_operand:V64I 0 "register_operand" "=e") + (unspec:V64I [(match_operand:V64I 1 "register_operand" "e") + (match_operand:V64I 2 "register_operand" "e")] + UNSPEC_ALIGNDATA))] + "TARGET_VIS" + "faligndata\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +(define_insn "alignaddr_vis" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ") + (match_operand:P 2 "register_or_zero_operand" "rJ")] + UNSPEC_ALIGNADDR))] + "TARGET_VIS" + "alignaddr\t%r1, %r2, %0") + +(define_insn "pdist_vis" + [(set (match_operand:DI 0 "register_operand" "=e") + (unspec:DI [(match_operand:V8QI 1 "register_operand" "e") + (match_operand:V8QI 2 "register_operand" "e") + (match_operand:DI 3 "register_operand" "0")] + UNSPEC_PDIST))] + "TARGET_VIS" + "pdist\t%1, %2, %0" + [(set_attr "type" "fga") + (set_attr "fptype" "double")]) + +(include "sync.md") diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt new file mode 100644 index 000000000..a97cad176 --- /dev/null +++ b/gcc/config/sparc/sparc.opt @@ -0,0 +1,126 @@ +; Options for the SPARC port of the compiler +; +; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +mfpu +Target Report Mask(FPU) +Use hardware FP + +mhard-float +Target RejectNegative Mask(FPU) MaskExists +Use hardware FP + +msoft-float +Target RejectNegative InverseMask(FPU) +Do not use hardware FP + +munaligned-doubles +Target Report Mask(UNALIGNED_DOUBLES) +Assume possible double misalignment + +mapp-regs +Target Report Mask(APP_REGS) +Use ABI reserved registers + +mhard-quad-float +Target Report RejectNegative Mask(HARD_QUAD) +Use hardware quad FP instructions + +msoft-quad-float +Target Report RejectNegative InverseMask(HARD_QUAD) +Do not use hardware quad fp instructions + +mv8plus +Target Report Mask(V8PLUS) +Compile for V8+ ABI + +mvis +Target Report Mask(VIS) +Use UltraSPARC Visual Instruction Set extensions + +mptr64 +Target Report RejectNegative Mask(PTR64) +Pointers are 64-bit + +mptr32 +Target Report RejectNegative InverseMask(PTR64) +Pointers are 32-bit + +m64 +Target Report RejectNegative Mask(64BIT) +Use 64-bit ABI + +m32 +Target Report RejectNegative InverseMask(64BIT) +Use 32-bit ABI + +mstack-bias +Target Report Mask(STACK_BIAS) +Use stack bias + +mfaster-structs +Target Report Mask(FASTER_STRUCTS) +Use structs on stronger alignment for double-word copies + +mrelax +Target +Optimize tail call instructions in assembler and linker + +mcpu= +Target RejectNegative Joined +Use features of and schedule code for given CPU + +mtune= +Target RejectNegative Joined +Schedule code for given CPU + +mcmodel= +Target RejectNegative Joined Var(sparc_cmodel_string) +Use given SPARC-V9 code model + +mstd-struct-return +Target Report RejectNegative Var(sparc_std_struct_return) +Enable strict 32-bit psABI struct return checking. + +mfix-at697f +Target Report RejectNegative Var(sparc_fix_at697f) +Enable workaround for single erratum of AT697F processor +(corresponding to erratum #13 of AT697E processor) + +Mask(LITTLE_ENDIAN) +;; Generate code for little-endian + +Mask(LONG_DOUBLE_128) +;; Use 128-bit long double + +Mask(SPARCLITE) +;; Generate code for SPARClite + +Mask(SPARCLET) +;; Generate code for SPARClet + +Mask(V8) +;; Generate code for SPARC-V8 + +Mask(V9) +;; Generate code for SPARC-V9 + +Mask(DEPRECATED_V8_INSNS) +;; Generate code that uses the V8 instructions deprecated +;; in the V9 architecture. diff --git a/gcc/config/sparc/sparclet.md b/gcc/config/sparc/sparclet.md new file mode 100644 index 000000000..3e99d56ad --- /dev/null +++ b/gcc/config/sparc/sparclet.md @@ -0,0 +1,43 @@ +;; Scheduling description for SPARClet. +;; Copyright (C) 2002, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The SPARClet is a single-issue processor. + +(define_automaton "sparclet") + +(define_cpu_unit "sl_load0,sl_load1,sl_load2,sl_load3" "sparclet") +(define_cpu_unit "sl_store,sl_imul" "sparclet") + +(define_reservation "sl_load_any" "(sl_load0 | sl_load1 | sl_load2 | sl_load3)") +(define_reservation "sl_load_all" "(sl_load0 + sl_load1 + sl_load2 + sl_load3)") + +(define_insn_reservation "sl_ld" 3 + (and (eq_attr "cpu" "tsc701") + (eq_attr "type" "load,sload")) + "sl_load_any, sl_load_any, sl_load_any") + +(define_insn_reservation "sl_st" 3 + (and (eq_attr "cpu" "tsc701") + (eq_attr "type" "store")) + "(sl_store+sl_load_all)*3") + +(define_insn_reservation "sl_imul" 5 + (and (eq_attr "cpu" "tsc701") + (eq_attr "type" "imul")) + "sl_imul*5") diff --git a/gcc/config/sparc/supersparc.md b/gcc/config/sparc/supersparc.md new file mode 100644 index 000000000..c5617c155 --- /dev/null +++ b/gcc/config/sparc/supersparc.md @@ -0,0 +1,92 @@ +;; Scheduling description for SuperSPARC. +;; Copyright (C) 2002, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The SuperSPARC is a tri-issue, which was considered quite parallel +;; at the time it was released. Much like UltraSPARC-I and UltraSPARC-II +;; there are two integer units but only one of them may take shifts. +;; +;; ??? If SuperSPARC has the same slotting rules as ultrasparc for these +;; ??? shifts, we should model that. + +(define_automaton "supersparc_0,supersparc_1") + +(define_cpu_unit "ss_memory, ss_shift, ss_iwport0, ss_iwport1" "supersparc_0") +(define_cpu_unit "ss_fpalu" "supersparc_0") +(define_cpu_unit "ss_fpmds" "supersparc_1") + +(define_reservation "ss_iwport" "(ss_iwport0 | ss_iwport1)") + +(define_insn_reservation "ss_iuload" 1 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "load,sload")) + "ss_memory") + +;; Ok, fpu loads deliver the result in zero cycles. But we +;; have to show the ss_memory reservation somehow, thus... +(define_insn_reservation "ss_fpload" 0 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "fpload")) + "ss_memory") + +(define_bypass 0 "ss_fpload" "ss_fp_alu,ss_fp_mult,ss_fp_divs,ss_fp_divd,ss_fp_sqrt") + +(define_insn_reservation "ss_store" 1 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "store,fpstore")) + "ss_memory") + +(define_insn_reservation "ss_ialu_shift" 1 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "shift")) + "ss_shift + ss_iwport") + +(define_insn_reservation "ss_ialu_any" 1 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "load,sload,store,shift,ialu")) + "ss_iwport") + +(define_insn_reservation "ss_fp_alu" 3 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "fp,fpmove,fpcmp")) + "ss_fpalu, nothing*2") + +(define_insn_reservation "ss_fp_mult" 3 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "fpmul")) + "ss_fpmds, nothing*2") + +(define_insn_reservation "ss_fp_divs" 6 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "fpdivs")) + "ss_fpmds*4, nothing*2") + +(define_insn_reservation "ss_fp_divd" 9 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "fpdivd")) + "ss_fpmds*7, nothing*2") + +(define_insn_reservation "ss_fp_sqrt" 12 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "fpsqrts,fpsqrtd")) + "ss_fpmds*10, nothing*2") + +(define_insn_reservation "ss_imul" 4 + (and (eq_attr "cpu" "supersparc") + (eq_attr "type" "imul")) + "ss_fpmds*4") diff --git a/gcc/config/sparc/sync.md b/gcc/config/sparc/sync.md new file mode 100644 index 000000000..5dd37d094 --- /dev/null +++ b/gcc/config/sparc/sync.md @@ -0,0 +1,199 @@ +;; GCC machine description for SPARC synchronization instructions. +;; Copyright (C) 2005, 2007, 2009, 2010 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_mode_iterator I12MODE [QI HI]) +(define_mode_iterator I24MODE [HI SI]) +(define_mode_iterator I48MODE [SI (DI "TARGET_ARCH64 || TARGET_V8PLUS")]) +(define_mode_attr modesuffix [(SI "") (DI "x")]) + +(define_expand "memory_barrier" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MEMBAR))] + "TARGET_V8 || TARGET_V9" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; + +}) + +(define_insn "*stbar" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMBAR))] + "TARGET_V8" + "stbar" + [(set_attr "type" "multi")]) + +;; membar #StoreStore | #LoadStore | #StoreLoad | #LoadLoad +(define_insn "*membar" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMBAR))] + "TARGET_V9" + "membar\t15" + [(set_attr "type" "multi")]) + +(define_expand "sync_compare_and_swap" + [(match_operand:I12MODE 0 "register_operand" "") + (match_operand:I12MODE 1 "memory_operand" "") + (match_operand:I12MODE 2 "register_operand" "") + (match_operand:I12MODE 3 "register_operand" "")] + "TARGET_V9" +{ + sparc_expand_compare_and_swap_12 (operands[0], operands[1], + operands[2], operands[3]); + DONE; +}) + +(define_expand "sync_compare_and_swap" + [(parallel + [(set (match_operand:I48MODE 0 "register_operand" "") + (match_operand:I48MODE 1 "memory_operand" "")) + (set (match_dup 1) + (unspec_volatile:I48MODE + [(match_operand:I48MODE 2 "register_operand" "") + (match_operand:I48MODE 3 "register_operand" "")] + UNSPECV_CAS))])] + "TARGET_V9" +{ + if (!REG_P (XEXP (operands[1], 0))) + { + rtx addr = force_reg (Pmode, XEXP (operands[1], 0)); + operands[1] = replace_equiv_address (operands[1], addr); + } + emit_insn (gen_memory_barrier ()); +}) + +(define_insn "*sync_compare_and_swap" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (mem:I48MODE (match_operand 1 "register_operand" "r"))) + (set (mem:I48MODE (match_dup 1)) + (unspec_volatile:I48MODE + [(match_operand:I48MODE 2 "register_operand" "r") + (match_operand:I48MODE 3 "register_operand" "0")] + UNSPECV_CAS))] + "TARGET_V9 && (mode == SImode || TARGET_ARCH64)" + "cas\t[%1], %2, %0" + [(set_attr "type" "multi")]) + +(define_insn "*sync_compare_and_swapdi_v8plus" + [(set (match_operand:DI 0 "register_operand" "=h") + (mem:DI (match_operand 1 "register_operand" "r"))) + (set (mem:DI (match_dup 1)) + (unspec_volatile:DI + [(match_operand:DI 2 "register_operand" "h") + (match_operand:DI 3 "register_operand" "0")] + UNSPECV_CAS))] + "TARGET_V8PLUS" +{ + if (sparc_check_64 (operands[3], insn) <= 0) + output_asm_insn ("srl\t%L3, 0, %L3", operands); + output_asm_insn ("sllx\t%H3, 32, %H3", operands); + output_asm_insn ("or\t%L3, %H3, %L3", operands); + if (sparc_check_64 (operands[2], insn) <= 0) + output_asm_insn ("srl\t%L2, 0, %L2", operands); + output_asm_insn ("sllx\t%H2, 32, %H3", operands); + output_asm_insn ("or\t%L2, %H3, %H3", operands); + output_asm_insn ("casx\t[%1], %H3, %L3", operands); + return "srlx\t%L3, 32, %H3"; +} + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_expand "sync_lock_test_and_set" + [(match_operand:I12MODE 0 "register_operand" "") + (match_operand:I12MODE 1 "memory_operand" "") + (match_operand:I12MODE 2 "arith_operand" "")] + "!TARGET_V9" +{ + if (operands[2] != const1_rtx) + FAIL; + if (TARGET_V8) + emit_insn (gen_memory_barrier ()); + if (mode != QImode) + operands[1] = adjust_address (operands[1], QImode, 0); + emit_insn (gen_ldstub (operands[0], operands[1])); + DONE; +}) + +(define_expand "sync_lock_test_and_setsi" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (unspec_volatile:SI [(match_operand:SI 1 "memory_operand" "")] + UNSPECV_SWAP)) + (set (match_dup 1) + (match_operand:SI 2 "arith_operand" ""))])] + "" +{ + if (! TARGET_V8 && ! TARGET_V9) + { + if (operands[2] != const1_rtx) + FAIL; + operands[1] = adjust_address (operands[1], QImode, 0); + emit_insn (gen_ldstubsi (operands[0], operands[1])); + DONE; + } + emit_insn (gen_memory_barrier ()); + operands[2] = force_reg (SImode, operands[2]); +}) + +(define_insn "*swapsi" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "memory_operand" "+m")] + UNSPECV_SWAP)) + (set (match_dup 1) + (match_operand:SI 2 "register_operand" "0"))] + "TARGET_V8 || TARGET_V9" + "swap\t%1, %0" + [(set_attr "type" "multi")]) + +(define_expand "ldstubqi" + [(parallel [(set (match_operand:QI 0 "register_operand" "") + (unspec_volatile:QI [(match_operand:QI 1 "memory_operand" "")] + UNSPECV_LDSTUB)) + (set (match_dup 1) (const_int -1))])] + "" + "") + +(define_expand "ldstub" + [(parallel [(set (match_operand:I24MODE 0 "register_operand" "") + (zero_extend:I24MODE + (unspec_volatile:QI [(match_operand:QI 1 "memory_operand" "")] + UNSPECV_LDSTUB))) + (set (match_dup 1) (const_int -1))])] + "" + "") + +(define_insn "*ldstubqi" + [(set (match_operand:QI 0 "register_operand" "=r") + (unspec_volatile:QI [(match_operand:QI 1 "memory_operand" "+m")] + UNSPECV_LDSTUB)) + (set (match_dup 1) (const_int -1))] + "" + "ldstub\t%1, %0" + [(set_attr "type" "multi")]) + +(define_insn "*ldstub" + [(set (match_operand:I24MODE 0 "register_operand" "=r") + (zero_extend:I24MODE + (unspec_volatile:QI [(match_operand:QI 1 "memory_operand" "+m")] + UNSPECV_LDSTUB))) + (set (match_dup 1) (const_int -1))] + "" + "ldstub\t%1, %0" + [(set_attr "type" "multi")]) diff --git a/gcc/config/sparc/sysv4.h b/gcc/config/sparc/sysv4.h new file mode 100644 index 000000000..2ffa94527 --- /dev/null +++ b/gcc/config/sparc/sysv4.h @@ -0,0 +1,125 @@ +/* Target definitions for GNU compiler for SPARC running System V.4 + Copyright (C) 1991, 1992, 1995, 1996, 1997, 1998, 2000, 2002, 2007, 2009, + 2010, 2011 + Free Software Foundation, Inc. + Contributed by Ron Guilmette (rfg@monkeys.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (sparc ELF)"); +#endif + +#undef SIZE_TYPE +#define SIZE_TYPE (TARGET_ARCH64 ? "long unsigned int" : "unsigned int") + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_ARCH64 ? "long int" : "int") + +/* Undefined some symbols which are appropriate only for typical svr4 + systems, but not for the specific case of svr4 running on a + SPARC. */ + +#undef INIT_SECTION_ASM_OP +#undef FINI_SECTION_ASM_OP +#undef READONLY_DATA_SECTION_ASM_OP +#undef TYPE_OPERAND_FMT +#undef STRING_ASM_OP +#undef COMMON_ASM_OP +#undef SKIP_ASM_OP +#undef SET_ASM_OP /* Has no equivalent. See ASM_OUTPUT_DEF below. */ + +/* Pass -K to the assembler when PIC. */ +#undef ASM_SPEC +#define ASM_SPEC \ + "%{v:-V} %{Qy:} %{!Qn:-Qy} %{Ym,*} \ + %{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)" + +/* Define the names of various pseudo-op used by the SPARC/svr4 assembler. + Note that many of these are different from the typical pseudo-ops used + by most svr4 assemblers. That is probably due to a (misguided?) attempt + to keep the SPARC/svr4 assembler somewhat compatible with the SPARC/SunOS + assembler. */ + +#define STRING_ASM_OP "\t.asciz\t" +#define COMMON_ASM_OP "\t.common\t" +#define SKIP_ASM_OP "\t.skip\t" + +/* This is the format used to print the second operand of a .type pseudo-op + for the SPARC/svr4 assembler. */ + +#define TYPE_OPERAND_FMT "#%s" + +#undef ASM_OUTPUT_CASE_LABEL +#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE) \ +do { ASM_OUTPUT_ALIGN ((FILE), Pmode == SImode ? 2 : 3); \ + (*targetm.asm_out.internal_label) ((FILE), PREFIX, NUM); \ + } while (0) + +/* This is how to equate one symbol to another symbol. The syntax used is + `SYM1=SYM2'. Note that this is different from the way equates are done + with most svr4 assemblers, where the syntax is `.set SYM1,SYM2'. */ + +#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2) \ + do { fprintf ((FILE), "\t"); \ + assemble_name (FILE, LABEL1); \ + fprintf (FILE, " = "); \ + assemble_name (FILE, LABEL2); \ + fprintf (FILE, "\n"); \ + } while (0) + +/* A set of symbol definitions for assembly pseudo-ops which will + get us switched to various sections of interest. These are used + in all places where we simply want to switch to a section, and + *not* to push the previous section name onto the assembler's + section names stack (as we do often in dwarfout.c). */ + +#define TEXT_SECTION_ASM_OP "\t.section\t\".text\"" +#define DATA_SECTION_ASM_OP "\t.section\t\".data\"" +#define BSS_SECTION_ASM_OP "\t.section\t\".bss\"" +#define READONLY_DATA_SECTION_ASM_OP "\t.section\t\".rodata\"" +#define INIT_SECTION_ASM_OP "\t.section\t\".init\"" +#define FINI_SECTION_ASM_OP "\t.section\t\".fini\"" + +/* Define the pseudo-ops used to switch to the .ctors and .dtors sections. + + Note that we want to give these sections the SHF_WRITE attribute + because these sections will actually contain data (i.e. tables of + addresses of functions in the current root executable or shared library + file) and, in the case of a shared library, the relocatable addresses + will have to be properly resolved/relocated (and then written into) by + the dynamic linker when it actually attaches the given shared library + to the executing process. (Note that on SVR4, you may wish to use the + `-z text' option to the ELF linker, when building a shared library, as + an additional check that you are doing everything right. But if you do + use the `-z text' option when building a shared library, you will get + errors unless the .ctors and .dtors sections are marked as writable + via the SHF_WRITE attribute.) */ + +#undef CTORS_SECTION_ASM_OP +#define CTORS_SECTION_ASM_OP "\t.section\t\".ctors\",#alloc,#write" +#undef DTORS_SECTION_ASM_OP +#define DTORS_SECTION_ASM_OP "\t.section\t\".dtors\",#alloc,#write" + +#undef ASM_OUTPUT_ALIGNED_BSS +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) + +/* Override the name of the mcount profiling function. */ + +#undef MCOUNT_FUNCTION +#define MCOUNT_FUNCTION "*_mcount" diff --git a/gcc/config/sparc/t-crtfm b/gcc/config/sparc/t-crtfm new file mode 100644 index 000000000..e0adb97bd --- /dev/null +++ b/gcc/config/sparc/t-crtfm @@ -0,0 +1,4 @@ +EXTRA_PARTS += crtfastmath.o + +$(T)crtfastmath.o: $(srcdir)/config/sparc/crtfastmath.c $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) $(LIBGCC2_CFLAGS) -c -o $(T)crtfastmath.o $(srcdir)/config/sparc/crtfastmath.c diff --git a/gcc/config/sparc/t-crtin b/gcc/config/sparc/t-crtin new file mode 100644 index 000000000..2612bac89 --- /dev/null +++ b/gcc/config/sparc/t-crtin @@ -0,0 +1,6 @@ +EXTRA_PARTS += crti.o crtn.o + +$(T)crti.o: $(srcdir)/config/sparc/sol2-ci.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-ci.asm +$(T)crtn.o: $(srcdir)/config/sparc/sol2-cn.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-cn.asm diff --git a/gcc/config/sparc/t-elf b/gcc/config/sparc/t-elf new file mode 100644 index 000000000..b1d18fda6 --- /dev/null +++ b/gcc/config/sparc/t-elf @@ -0,0 +1,40 @@ +# Copyright (C) 1997, 1998, 1999, 2001, 2002, 2005, 2007, 2010, 2011 +# Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMSRC = sparc/lb1spc.asm +LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + cat $(srcdir)/config/fp-bit.c > dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +MULTILIB_OPTIONS = msoft-float mcpu=v8 +MULTILIB_DIRNAMES = soft v8 +MULTILIB_MATCHES = msoft-float=mno-fpu + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/sparc/t-leon b/gcc/config/sparc/t-leon new file mode 100644 index 000000000..6573f824e --- /dev/null +++ b/gcc/config/sparc/t-leon @@ -0,0 +1,42 @@ +# Copyright (C) 2010 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMSRC = sparc/lb1spc.asm +LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + cat $(srcdir)/config/fp-bit.c > dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +# Multilibs for LEON +# LEON is a SPARC-V8, but the AT697 implementation has a bug in the +# V8-specific instructions. +MULTILIB_OPTIONS = mcpu=v7 msoft-float +MULTILIB_DIRNAMES = v7 soft +MULTILIB_MATCHES = mcpu?v7=mv7 msoft-float=mno-fpu + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/sparc/t-leon3 b/gcc/config/sparc/t-leon3 new file mode 100644 index 000000000..ce57d1675 --- /dev/null +++ b/gcc/config/sparc/t-leon3 @@ -0,0 +1,37 @@ +# Copyright (C) 2010 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + cat $(srcdir)/config/fp-bit.c > dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +# Multilibs for LEON3 +MULTILIB_OPTIONS = msoft-float +MULTILIB_DIRNAMES = soft +MULTILIB_MATCHES = msoft-float=mno-fpu + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/sparc/t-linux b/gcc/config/sparc/t-linux new file mode 100644 index 000000000..38741fd89 --- /dev/null +++ b/gcc/config/sparc/t-linux @@ -0,0 +1,7 @@ +# Override t-slibgcc-elf-ver to export some libgcc symbols with +# the symbol versions that glibc used. +# Avoid the t-linux version file. +SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver \ + $(srcdir)/config/sparc/libgcc-sparc-glibc.ver + +MULTIARCH_DIRNAME = $(call if_multiarch,sparc-linux-gnu) diff --git a/gcc/config/sparc/t-linux64 b/gcc/config/sparc/t-linux64 new file mode 100644 index 000000000..d904876d4 --- /dev/null +++ b/gcc/config/sparc/t-linux64 @@ -0,0 +1,37 @@ +# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, +# 2006, 2010, 2012 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# On Debian, Ubuntu and other derivative distributions, the 32bit libraries +# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to +# /lib and /usr/lib, while other distributions install libraries into /lib64 +# and /usr/lib64. The LSB does not enforce the use of /lib64 and /usr/lib64, +# it doesn't tell anything about the 32bit libraries on those systems. Set +# MULTILIB_OSDIRNAMES according to what is found on the target. + +MULTILIB_OPTIONS = m64/m32 +MULTILIB_DIRNAMES = 64 32 +MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:sparc64-linux-gnu) +MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:sparc-linux-gnu) + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib + +CRTSTUFF_T_CFLAGS = `if test x$$($(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) \ + -print-multi-os-directory) \ + = x../lib64; then echo -mcmodel=medany; fi` diff --git a/gcc/config/sparc/t-netbsd64 b/gcc/config/sparc/t-netbsd64 new file mode 100644 index 000000000..0fddb0ffe --- /dev/null +++ b/gcc/config/sparc/t-netbsd64 @@ -0,0 +1,8 @@ +# Disable multilib fow now, as NetBSD/sparc64 does not ship with +# a 32-bit environment. +#MULTILIB_OPTIONS = m32/m64 +#MULTILIB_DIRNAMES = 32 64 +#MULTILIB_MATCHES = + +#LIBGCC = stmp-multilib +#INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/sparc/t-sol2 b/gcc/config/sparc/t-sol2 new file mode 100644 index 000000000..b7f665b1b --- /dev/null +++ b/gcc/config/sparc/t-sol2 @@ -0,0 +1,39 @@ +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, +# 2002 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# gmon build rule: +$(T)gmon.o: $(srcdir)/config/sparc/gmon-sol2.c $(GCC_PASSES) \ + $(TCONFIG_H) tsystem.h coretypes.h $(TM_H) stmp-int-hdrs + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) \ + -c $(srcdir)/config/sparc/gmon-sol2.c -o $(T)gmon.o + +# Assemble startup files. +$(T)crt1.o: $(srcdir)/config/sparc/sol2-c1.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crt1.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-c1.asm +$(T)gcrt1.o: $(srcdir)/config/sparc/sol2-c1.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -DGCRT1 -o $(T)gcrt1.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-c1.asm + +# We need to use -fPIC when we are using gcc to compile the routines in +# crtstuff.c. This is only really needed when we are going to use gcc/g++ +# to produce a shared library, but since we don't know ahead of time when +# we will be doing that, we just always use -fPIC when compiling the +# routines in crtstuff.c. + +CRTSTUFF_T_CFLAGS = -fPIC +TARGET_LIBGCC2_CFLAGS = -fPIC diff --git a/gcc/config/sparc/t-sol2-64 b/gcc/config/sparc/t-sol2-64 new file mode 100644 index 000000000..1802239d3 --- /dev/null +++ b/gcc/config/sparc/t-sol2-64 @@ -0,0 +1,7 @@ +MULTILIB_OPTIONS = m32/m64 +MULTILIB_DIRNAMES = sparcv8plus sparcv9 +MULTILIB_MATCHES = +MULTILIB_OSDIRNAMES = . sparcv9 + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/sparc/t-vxworks b/gcc/config/sparc/t-vxworks new file mode 100644 index 000000000..2aabf1a43 --- /dev/null +++ b/gcc/config/sparc/t-vxworks @@ -0,0 +1,5 @@ +# Multilibs for VxWorks. + +MULTILIB_OPTIONS = mrtp fPIC +MULTILIB_MATCHES = fPIC=fpic +MULTILIB_EXCEPTIONS = fPIC diff --git a/gcc/config/sparc/ultra1_2.md b/gcc/config/sparc/ultra1_2.md new file mode 100644 index 000000000..f0bd7a1cb --- /dev/null +++ b/gcc/config/sparc/ultra1_2.md @@ -0,0 +1,301 @@ +;; Scheduling description for UltraSPARC-I/II. +;; Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; UltraSPARC-I and II are quad-issue processors. Interesting features +;; to note: +;; +;; - Buffered loads, they can queue waiting for the actual data until +;; an instruction actually tries to reference the destination register +;; as an input +;; - Two integer units. Only one of them can do shifts, and the other +;; is the only one which may do condition code setting instructions. +;; Complicating things further, a shift may go only into the first +;; slot in a dispatched group. And if you have a non-condition code +;; setting instruction and one that does set the condition codes. The +;; former must be issued first in order for both of them to issue. +;; - Stores can issue before the value being stored is available. As long +;; as the input data becomes ready before the store is to move out of the +;; store buffer, it will not cause a stall. +;; - Branches may issue in the same cycle as an instruction setting the +;; condition codes being tested by that branch. This does not apply +;; to floating point, only integer. + +(define_automaton "ultrasparc_0,ultrasparc_1") + +(define_cpu_unit "us1_fdivider,us1_fpm" "ultrasparc_0"); +(define_cpu_unit "us1_fpa,us1_load_writeback" "ultrasparc_1") +(define_cpu_unit "us1_fps_0,us1_fps_1,us1_fpd_0,us1_fpd_1" "ultrasparc_1") +(define_cpu_unit "us1_slot0,us1_slot1,us1_slot2,us1_slot3" "ultrasparc_1") +(define_cpu_unit "us1_ieu0,us1_ieu1,us1_cti,us1_lsu" "ultrasparc_1") + +(define_reservation "us1_slot012" "(us1_slot0 | us1_slot1 | us1_slot2)") +(define_reservation "us1_slotany" "(us1_slot0 | us1_slot1 | us1_slot2 | us1_slot3)") +(define_reservation "us1_single_issue" "us1_slot0 + us1_slot1 + us1_slot2 + us1_slot3") + +(define_reservation "us1_fp_single" "(us1_fps_0 | us1_fps_1)") +(define_reservation "us1_fp_double" "(us1_fpd_0 | us1_fpd_1)") + +;; This is a simplified representation of the issue at hand. +;; For most cases, going from one FP precision type insn to another +;; just breaks up the insn group. However for some cases, such +;; a situation causes the second insn to stall 2 more cycles. +(exclusion_set "us1_fps_0,us1_fps_1" "us1_fpd_0,us1_fpd_1") + +;; If we have to schedule an ieu1 specific instruction and we want +;; to reserve the ieu0 unit as well, we must reserve it first. So for +;; example we could not schedule this sequence: +;; COMPARE IEU1 +;; IALU IEU0 +;; but we could schedule them together like this: +;; IALU IEU0 +;; COMPARE IEU1 +;; This basically requires that ieu0 is reserved before ieu1 when +;; it is required that both be reserved. +(absence_set "us1_ieu0" "us1_ieu1") + +;; This defines the slotting order. Most IEU instructions can only +;; execute in the first three slots, FPU and branches can go into +;; any slot. We represent instructions which "break the group" +;; as requiring reservation of us1_slot0. +(absence_set "us1_slot0" "us1_slot1,us1_slot2,us1_slot3") +(absence_set "us1_slot1" "us1_slot2,us1_slot3") +(absence_set "us1_slot2" "us1_slot3") + +(define_insn_reservation "us1_single" 1 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "multi,savew,flushw,iflush,trap")) + "us1_single_issue") + +(define_insn_reservation "us1_simple_ieuN" 1 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "ialu")) + "(us1_ieu0 | us1_ieu1) + us1_slot012") + +(define_insn_reservation "us1_simple_ieu0" 1 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "shift")) + "us1_ieu0 + us1_slot012") + +(define_insn_reservation "us1_simple_ieu1" 1 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "compare")) + "us1_ieu1 + us1_slot012") + +(define_insn_reservation "us1_ialuX" 1 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "ialuX")) + "us1_single_issue") + +(define_insn_reservation "us1_cmove" 2 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "cmove")) + "us1_single_issue, nothing") + +(define_insn_reservation "us1_imul" 1 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "imul")) + "us1_single_issue") + +(define_insn_reservation "us1_idiv" 1 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "idiv")) + "us1_single_issue") + +;; For loads, the "delayed return mode" behavior of the chip +;; is represented using the us1_load_writeback resource. +(define_insn_reservation "us1_load" 2 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "load,fpload")) + "us1_lsu + us1_slot012, us1_load_writeback") + +(define_insn_reservation "us1_load_signed" 3 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "sload")) + "us1_lsu + us1_slot012, nothing, us1_load_writeback") + +(define_insn_reservation "us1_store" 1 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "store,fpstore")) + "us1_lsu + us1_slot012") + +(define_insn_reservation "us1_branch" 1 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "branch")) + "us1_cti + us1_slotany") + +(define_insn_reservation "us1_call_jmpl" 1 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch")) + "us1_cti + us1_ieu1 + us1_slot0") + +(define_insn_reservation "us1_fmov_single" 1 + (and (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fpmove")) + (eq_attr "fptype" "single")) + "us1_fpa + us1_fp_single + us1_slotany") + +(define_insn_reservation "us1_fmov_double" 1 + (and (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fpmove")) + (eq_attr "fptype" "double")) + "us1_fpa + us1_fp_double + us1_slotany") + +(define_insn_reservation "us1_fcmov_single" 2 + (and (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fpcmove,fpcrmove")) + (eq_attr "fptype" "single")) + "us1_fpa + us1_fp_single + us1_slotany, nothing") + +(define_insn_reservation "us1_fcmov_double" 2 + (and (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fpcmove,fpcrmove")) + (eq_attr "fptype" "double")) + "us1_fpa + us1_fp_double + us1_slotany, nothing") + +(define_insn_reservation "us1_faddsub_single" 4 + (and (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fp")) + (eq_attr "fptype" "single")) + "us1_fpa + us1_fp_single + us1_slotany, nothing*3") + +(define_insn_reservation "us1_faddsub_double" 4 + (and (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fp")) + (eq_attr "fptype" "double")) + "us1_fpa + us1_fp_double + us1_slotany, nothing*3") + +(define_insn_reservation "us1_fpcmp_single" 1 + (and (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fpcmp")) + (eq_attr "fptype" "single")) + "us1_fpa + us1_fp_single + us1_slotany") + +(define_insn_reservation "us1_fpcmp_double" 1 + (and (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fpcmp")) + (eq_attr "fptype" "double")) + "us1_fpa + us1_fp_double + us1_slotany") + +(define_insn_reservation "us1_fmult_single" 4 + (and (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fpmul")) + (eq_attr "fptype" "single")) + "us1_fpm + us1_fp_single + us1_slotany, nothing*3") + +(define_insn_reservation "us1_fmult_double" 4 + (and (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fpmul")) + (eq_attr "fptype" "double")) + "us1_fpm + us1_fp_double + us1_slotany, nothing*3") + +;; This is actually in theory dangerous, because it is possible +;; for the chip to prematurely dispatch the dependent instruction +;; in the G stage, resulting in a 9 cycle stall. However I have never +;; been able to trigger this case myself even with hand written code, +;; so it must require some rare complicated pipeline state. +(define_bypass 3 + "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double" + "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double") + +;; Floating point divide and square root use the multiplier unit +;; for final rounding 3 cycles before the divide/sqrt is complete. + +(define_insn_reservation "us1_fdivs" + 13 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fpdivs,fpsqrts")) + "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*8, (us1_fpm + us1_fdivider), us1_fdivider*2" + ) + +(define_bypass + 12 + "us1_fdivs" + "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double") + +(define_insn_reservation "us1_fdivd" + 23 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fpdivd,fpsqrtd")) + "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*18, (us1_fpm + us1_fdivider), us1_fdivider*2" + ) +(define_bypass + 22 + "us1_fdivd" + "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double") + +;; Any store may multi issue with the insn creating the source +;; data as long as that creating insn is not an FPU div/sqrt. +;; We need a special guard function because this bypass does +;; not apply to the address inputs of the store. +(define_bypass 0 "us1_simple_ieuN,us1_simple_ieu1,us1_simple_ieu0,us1_faddsub_single,us1_faddsub_double,us1_fmov_single,us1_fmov_double,us1_fcmov_single,us1_fcmov_double,us1_fmult_single,us1_fmult_double" "us1_store" + "store_data_bypass_p") + +;; An integer branch may execute in the same cycle as the compare +;; creating the condition codes. +(define_bypass 0 "us1_simple_ieu1" "us1_branch") + +;; VIS scheduling +(define_insn_reservation "us1_fga_single" + 2 + (and (and + (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fga")) + (eq_attr "fptype" "single")) + "us1_fpa + us1_fp_single + us1_slotany, nothing") + +(define_bypass 1 "us1_fga_single" "us1_fga_single") + +(define_insn_reservation "us1_fga_double" + 2 + (and (and + (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fga")) + (eq_attr "fptype" "double")) + "us1_fpa + us1_fp_double + us1_slotany, nothing") + +(define_bypass 1 "us1_fga_double" "us1_fga_double") + +(define_insn_reservation "us1_fgm_single" + 4 + (and (and + (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fgm_pack,fgm_mul,fgm_cmp")) + (eq_attr "fptype" "single")) + "us1_fpm + us1_fp_single + us1_slotany, nothing*3") + +(define_bypass 3 "us1_fgm_single" "us1_fga_single") + +(define_insn_reservation "us1_fgm_double" + 4 + (and (and + (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fgm_pack,fgm_mul,fgm_cmp")) + (eq_attr "fptype" "double")) + "us1_fpm + us1_fp_double + us1_slotany, nothing*3") + +(define_bypass 3 "us1_fgm_double" "us1_fga_double") + +(define_insn_reservation "us1_pdist" + 4 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "fgm_pdist")) + "us1_fpm + us1_fp_double + us1_slotany, nothing*3") + +(define_bypass 3 "us1_pdist" "us1_fga_double,us1_fga_single") +(define_bypass 1 "us1_pdist" "us1_pdist") diff --git a/gcc/config/sparc/ultra3.md b/gcc/config/sparc/ultra3.md new file mode 100644 index 000000000..8feaf794c --- /dev/null +++ b/gcc/config/sparc/ultra3.md @@ -0,0 +1,189 @@ +;; Scheduling description for UltraSPARC-III. +;; Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; UltraSPARC-III is a quad-issue processor. +;; +;; It is also a much simpler beast than Ultra-I/II, no silly +;; slotting rules and both integer units are fully symmetric. +;; It does still have single-issue instructions though. + +(define_automaton "ultrasparc3_0,ultrasparc3_1") + +(define_cpu_unit "us3_ms,us3_br,us3_fpm" "ultrasparc3_0") +(define_cpu_unit "us3_a0,us3_a1,us3_slot0,\ + us3_slot1,us3_slot2,us3_slot3,us3_fpa" "ultrasparc3_1") +(define_cpu_unit "us3_load_writeback" "ultrasparc3_1") + +(define_reservation "us3_slotany" "(us3_slot0 | us3_slot1 | us3_slot2 | us3_slot3)") +(define_reservation "us3_single_issue" "us3_slot0 + us3_slot1 + us3_slot2 + us3_slot3") +(define_reservation "us3_ax" "(us3_a0 | us3_a1)") + +(define_insn_reservation "us3_single" 1 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "multi,savew,flushw,iflush,trap")) + "us3_single_issue") + +(define_insn_reservation "us3_integer" 1 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "ialu,shift,compare")) + "us3_ax + us3_slotany") + +(define_insn_reservation "us3_ialuX" 5 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "ialu,shift,compare")) + "us3_single_issue*4, nothing") + +(define_insn_reservation "us3_cmove" 2 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "cmove")) + "us3_ms + us3_br + us3_slotany, nothing") + +;; ??? Not entirely accurate. +;; ??? It can run from 6 to 9 cycles. The first cycle the MS pipe +;; ??? is needed, and the instruction group is broken right after +;; ??? the imul. Then 'helper' instructions are generated to perform +;; ??? each further stage of the multiplication, each such 'helper' is +;; ??? single group. So, the reservation aspect is represented accurately +;; ??? here, but the variable cycles are not. +;; ??? Currently I have no idea how to determine the variability, but once +;; ??? known we can simply add a define_bypass or similar to model it. +(define_insn_reservation "us3_imul" 7 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "imul")) + "us3_ms + us3_slotany, us3_single_issue*4, nothing*2") + +(define_insn_reservation "us3_idiv" 72 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "idiv")) + "us3_ms + us3_slotany, us3_single_issue*69, nothing*2") + +;; UltraSPARC-III has a similar load delay as UltraSPARC-I/II except +;; that all loads except 32-bit/64-bit unsigned loads take the extra +;; delay for sign/zero extension. +(define_insn_reservation "us3_2cycle_load" 2 + (and (eq_attr "cpu" "ultrasparc3") + (and (eq_attr "type" "load,fpload") + (eq_attr "us3load_type" "2cycle"))) + "us3_ms + us3_slotany, us3_load_writeback") + +(define_insn_reservation "us3_load_delayed" 3 + (and (eq_attr "cpu" "ultrasparc3") + (and (eq_attr "type" "load,sload") + (eq_attr "us3load_type" "3cycle"))) + "us3_ms + us3_slotany, nothing, us3_load_writeback") + +(define_insn_reservation "us3_store" 1 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "store,fpstore")) + "us3_ms + us3_slotany") + +(define_insn_reservation "us3_branch" 1 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "branch")) + "us3_br + us3_slotany") + +(define_insn_reservation "us3_call_jmpl" 1 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch")) + "us3_br + us3_ms + us3_slotany") + +(define_insn_reservation "us3_fmov" 3 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fpmove")) + "us3_fpa + us3_slotany, nothing*2") + +(define_insn_reservation "us3_fcmov" 3 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fpcmove")) + "us3_fpa + us3_br + us3_slotany, nothing*2") + +(define_insn_reservation "us3_fcrmov" 3 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fpcrmove")) + "us3_fpa + us3_ms + us3_slotany, nothing*2") + +(define_insn_reservation "us3_faddsub" 4 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fp")) + "us3_fpa + us3_slotany, nothing*3") + +(define_insn_reservation "us3_fpcmp" 5 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fpcmp")) + "us3_fpa + us3_slotany, nothing*4") + +(define_insn_reservation "us3_fmult" 4 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fpmul")) + "us3_fpm + us3_slotany, nothing*3") + +(define_insn_reservation "us3_fdivs" 17 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fpdivs")) + "(us3_fpm + us3_slotany), us3_fpm*14, nothing*2") + +(define_insn_reservation "us3_fsqrts" 20 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fpsqrts")) + "(us3_fpm + us3_slotany), us3_fpm*17, nothing*2") + +(define_insn_reservation "us3_fdivd" 20 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fpdivd")) + "(us3_fpm + us3_slotany), us3_fpm*17, nothing*2") + +(define_insn_reservation "us3_fsqrtd" 29 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fpsqrtd")) + "(us3_fpm + us3_slotany), us3_fpm*26, nothing*2") + +;; Any store may multi issue with the insn creating the source +;; data as long as that creating insn is not an FPU div/sqrt. +;; We need a special guard function because this bypass does +;; not apply to the address inputs of the store. +(define_bypass 0 "us3_integer,us3_faddsub,us3_fmov,us3_fcmov,us3_fmult" "us3_store" + "store_data_bypass_p") + +;; An integer branch may execute in the same cycle as the compare +;; creating the condition codes. +(define_bypass 0 "us3_integer" "us3_branch") + +;; If FMOVfcc is user of FPCMP, latency is only 1 cycle. +(define_bypass 1 "us3_fpcmp" "us3_fcmov") + +;; VIS scheduling +(define_insn_reservation "us3_fga" + 3 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fga")) + "us3_fpa + us3_slotany, nothing*2") + +(define_insn_reservation "us3_fgm" + 4 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fgm_pack,fgm_mul,fgm_cmp")) + "us3_fpm + us3_slotany, nothing*3") + +(define_insn_reservation "us3_pdist" + 4 + (and (eq_attr "cpu" "ultrasparc3") + (eq_attr "type" "fgm_pdist")) + "us3_fpm + us3_slotany, nothing*3") + +(define_bypass 1 "us3_pdist" "us3_pdist") diff --git a/gcc/config/sparc/vxworks.h b/gcc/config/sparc/vxworks.h new file mode 100644 index 000000000..e1b596e7f --- /dev/null +++ b/gcc/config/sparc/vxworks.h @@ -0,0 +1,60 @@ +/* Definitions of target machine for GNU compiler, + for SPARC targeting the VxWorks run time environment. + Copyright (C) 2007, 2010, 2011 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__sparc"); \ + builtin_define ("CPU=SIMSPARCSOLARIS"); \ + VXWORKS_OS_CPP_BUILTINS (); \ + } \ + while (0) + +#undef SUBTARGET_OVERRIDE_OPTIONS +#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS + +#undef CPP_SUBTARGET_SPEC +#define CPP_SUBTARGET_SPEC VXWORKS_ADDITIONAL_CPP_SPEC + +#undef ASM_SPEC +#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)" + +#undef LIB_SPEC +#define LIB_SPEC VXWORKS_LIB_SPEC +#undef LINK_SPEC +#define LINK_SPEC VXWORKS_LINK_SPEC +#undef STARTFILE_SPEC +#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC +#undef ENDFILE_SPEC +#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC + +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (SPARC/VxWorks)", stderr); + +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER + +/* Use standard numbered ctors/dtors sections. */ +#undef CTORS_SECTION_ASM_OP +#undef DTORS_SECTION_ASM_OP + +/* We cannot use PC-relative accesses for VxWorks PIC because there is no + fixed gap between segments. */ +#undef ASM_PREFERRED_EH_DATA_FORMAT -- cgit v1.2.3