diff options
author | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
---|---|---|
committer | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
commit | 554fd8c5195424bdbcabf5de30fdc183aba391bd (patch) | |
tree | 976dc5ab7fddf506dadce60ae936f43f58787092 /gcc/config/xtensa | |
download | cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2 cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz |
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig;
imported gcc-4.6.4 source tree from verified upstream tarball.
downloading a git-generated archive based on the 'upstream' tag
should provide you with a source tree that is binary identical
to the one extracted from the above tarball.
if you have obtained the source via the command 'git clone',
however, do note that line-endings of files in your working
directory might differ from line-endings of the respective
files in the upstream repository.
Diffstat (limited to 'gcc/config/xtensa')
-rw-r--r-- | gcc/config/xtensa/constraints.md | 139 | ||||
-rw-r--r-- | gcc/config/xtensa/crti.asm | 51 | ||||
-rw-r--r-- | gcc/config/xtensa/crtn.asm | 46 | ||||
-rw-r--r-- | gcc/config/xtensa/elf.h | 104 | ||||
-rw-r--r-- | gcc/config/xtensa/elf.opt | 30 | ||||
-rw-r--r-- | gcc/config/xtensa/ieee754-df.S | 2388 | ||||
-rw-r--r-- | gcc/config/xtensa/ieee754-sf.S | 1757 | ||||
-rw-r--r-- | gcc/config/xtensa/lib1funcs.asm | 845 | ||||
-rw-r--r-- | gcc/config/xtensa/lib2funcs.S | 186 | ||||
-rw-r--r-- | gcc/config/xtensa/libgcc-xtensa.ver | 3 | ||||
-rw-r--r-- | gcc/config/xtensa/linux-unwind.h | 97 | ||||
-rw-r--r-- | gcc/config/xtensa/linux.h | 71 | ||||
-rw-r--r-- | gcc/config/xtensa/predicates.md | 175 | ||||
-rw-r--r-- | gcc/config/xtensa/t-elf | 6 | ||||
-rw-r--r-- | gcc/config/xtensa/t-linux | 3 | ||||
-rw-r--r-- | gcc/config/xtensa/t-xtensa | 42 | ||||
-rw-r--r-- | gcc/config/xtensa/unwind-dw2-xtensa.c | 546 | ||||
-rw-r--r-- | gcc/config/xtensa/unwind-dw2-xtensa.h | 50 | ||||
-rw-r--r-- | gcc/config/xtensa/xtensa-protos.h | 74 | ||||
-rw-r--r-- | gcc/config/xtensa/xtensa.c | 3715 | ||||
-rw-r--r-- | gcc/config/xtensa/xtensa.h | 847 | ||||
-rw-r--r-- | gcc/config/xtensa/xtensa.md | 1914 | ||||
-rw-r--r-- | gcc/config/xtensa/xtensa.opt | 43 |
23 files changed, 13132 insertions, 0 deletions
diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md new file mode 100644 index 000000000..bde1ba31a --- /dev/null +++ b/gcc/config/xtensa/constraints.md @@ -0,0 +1,139 @@ +;; Constraint definitions for Xtensa. +;; Copyright (C) 2006, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Register constraints. + +(define_register_constraint "a" "GR_REGS" + "General-purpose AR registers @code{a0}-@code{a15}, + except @code{a1} (@code{sp}).") + +(define_register_constraint "b" "TARGET_BOOLEANS ? BR_REGS : NO_REGS" + "Boolean registers @code{b0}-@code{b15}; only available if the Xtensa + Boolean Option is configured.") + +(define_register_constraint "d" "TARGET_DENSITY ? AR_REGS: NO_REGS" + "@internal + All AR registers, including sp, but only if the Xtensa Code Density + Option is configured.") + +(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS" + "Floating-point registers @code{f0}-@code{f15}; only available if the + Xtensa Floating-Pointer Coprocessor is configured.") + +(define_register_constraint "q" "SP_REG" + "@internal + The stack pointer (register @code{a1}).") + +(define_register_constraint "A" "TARGET_MAC16 ? ACC_REG : NO_REGS" + "The low 32 bits of the accumulator from the Xtensa MAC16 Option.") + +(define_register_constraint "B" "TARGET_SEXT ? GR_REGS : NO_REGS" + "@internal + General-purpose AR registers, but only if the Xtensa Sign Extend + Option is configured.") + +(define_register_constraint "C" "TARGET_MUL16 ? GR_REGS: NO_REGS" + "@internal + General-purpose AR registers, but only if the Xtensa 16-Bit Integer + Multiply Option is configured.") + +(define_register_constraint "D" "TARGET_DENSITY ? GR_REGS: NO_REGS" + "@internal + General-purpose AR registers, but only if the Xtensa Code Density + Option is configured.") + +(define_register_constraint "W" "TARGET_CONST16 ? GR_REGS: NO_REGS" + "@internal + General-purpose AR registers, but only if the Xtensa Const16 + Option is configured.") + +;; Integer constant constraints. + +(define_constraint "I" + "A signed 12-bit integer constant for use with MOVI instructions." + (and (match_code "const_int") + (match_test "xtensa_simm12b (ival)"))) + +(define_constraint "J" + "A signed 8-bit integer constant for use with ADDI instructions." + (and (match_code "const_int") + (match_test "xtensa_simm8 (ival)"))) + +(define_constraint "K" + "A constant integer that can be an immediate operand of an Xtensa + conditional branch instruction that performs a signed comparison or + a comparison against zero." + (and (match_code "const_int") + (match_test "xtensa_b4const_or_zero (ival)"))) + +(define_constraint "L" + "A constant integer that can be an immediate operand of an Xtensa + conditional branch instruction that performs an unsigned comparison." + (and (match_code "const_int") + (match_test "xtensa_b4constu (ival)"))) + +(define_constraint "M" + "An integer constant in the range @minus{}32-95 for use with MOVI.N + instructions." + (and (match_code "const_int") + (match_test "ival >= -32 && ival <= 95"))) + +(define_constraint "N" + "An unsigned 8-bit integer constant shifted left by 8 bits for use + with ADDMI instructions." + (and (match_code "const_int") + (match_test "xtensa_simm8x256 (ival)"))) + +(define_constraint "O" + "An integer constant that can be used in ADDI.N instructions." + (and (match_code "const_int") + (match_test "ival == -1 || (ival >= 1 && ival <= 15)"))) + +(define_constraint "P" + "An integer constant that can be used as a mask value in an EXTUI + instruction." + (and (match_code "const_int") + (match_test "xtensa_mask_immediate (ival)"))) + +;; Memory constraints. Do not use define_memory_constraint here. Doing so +;; causes reload to force some constants into the constant pool, but since +;; the Xtensa constant pool can only be accessed with L32R instructions, it +;; is always better to just copy a constant into a register. Instead, use +;; regular constraints but add a check to allow pseudos during reload. + +(define_constraint "R" + "Memory that can be accessed with a 4-bit unsigned offset from a register." + (ior (and (match_code "mem") + (match_test "smalloffset_mem_p (op)")) + (and (match_code "reg") + (match_test "reload_in_progress + && REGNO (op) >= FIRST_PSEUDO_REGISTER")))) + +(define_constraint "T" + "Memory in a literal pool (addressable with an L32R instruction)." + (and (match_code "mem") + (match_test "!TARGET_CONST16 && constantpool_mem_p (op)"))) + +(define_constraint "U" + "Memory that is not in a literal pool." + (ior (and (match_code "mem") + (match_test "! constantpool_mem_p (op)")) + (and (match_code "reg") + (match_test "reload_in_progress + && REGNO (op) >= FIRST_PSEUDO_REGISTER")))) diff --git a/gcc/config/xtensa/crti.asm b/gcc/config/xtensa/crti.asm new file mode 100644 index 000000000..cbe91b0e7 --- /dev/null +++ b/gcc/config/xtensa/crti.asm @@ -0,0 +1,51 @@ +# Start .init and .fini sections. +# Copyright (C) 2003, 2009 Free Software Foundation, Inc. +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. + +# This file just makes a stack frame for the contents of the .fini and +# .init sections. Users may put any desired instructions in those +# sections. + +#include "xtensa-config.h" + + .section .init + .globl _init + .type _init,@function + .align 4 +_init: +#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ + entry sp, 64 +#else + addi sp, sp, -32 + s32i a0, sp, 0 +#endif + + .section .fini + .globl _fini + .type _fini,@function + .align 4 +_fini: +#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ + entry sp, 64 +#else + addi sp, sp, -32 + s32i a0, sp, 0 +#endif diff --git a/gcc/config/xtensa/crtn.asm b/gcc/config/xtensa/crtn.asm new file mode 100644 index 000000000..413cfa0ac --- /dev/null +++ b/gcc/config/xtensa/crtn.asm @@ -0,0 +1,46 @@ +# End of .init and .fini sections. +# Copyright (C) 2003, 2009 Free Software Foundation, Inc. +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. + + +# This file just makes sure that the .fini and .init sections do in +# fact return. Users may put any desired instructions in those sections. +# This file is the last thing linked into any executable. + +#include "xtensa-config.h" + + .section .init +#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ + retw +#else + l32i a0, sp, 0 + addi sp, sp, 32 + ret +#endif + + .section .fini +#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ + retw +#else + l32i a0, sp, 0 + addi sp, sp, 32 + ret +#endif diff --git a/gcc/config/xtensa/elf.h b/gcc/config/xtensa/elf.h new file mode 100644 index 000000000..54a9c8f19 --- /dev/null +++ b/gcc/config/xtensa/elf.h @@ -0,0 +1,104 @@ +/* Xtensa/Elf configuration. + Derived from the configuration for GCC for Intel i386 running Linux. + Copyright (C) 2001, 2003, 2006, 2007, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#define TARGET_SECTION_TYPE_FLAGS xtensa_multibss_section_type_flags + +/* Don't assume anything about the header files. */ +#define NO_IMPLICIT_EXTERN_C + +#undef ASM_APP_ON +#define ASM_APP_ON "#APP\n" + +#undef ASM_APP_OFF +#define ASM_APP_OFF "#NO_APP\n" + +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (Xtensa/ELF)", stderr); + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "short unsigned int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 16 + +#undef ASM_SPEC +#define ASM_SPEC \ + "%{mtext-section-literals:--text-section-literals} \ + %{mno-text-section-literals:--no-text-section-literals} \ + %{mtarget-align:--target-align} \ + %{mno-target-align:--no-target-align} \ + %{mlongcalls:--longcalls} \ + %{mno-longcalls:--no-longcalls}" + +#undef LIB_SPEC +#define LIB_SPEC "-lc -lsim -lc -lhandlers-sim -lhal" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + +#undef LINK_SPEC +#define LINK_SPEC \ + "%{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + %{static:-static}}}" + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* Avoid dots for compatibility with VxWorks. */ +#undef NO_DOLLAR_IN_LABEL +#define NO_DOT_IN_LABEL + +/* Do not force "-fpic" for this target. */ +#define XTENSA_ALWAYS_PIC 0 + +#undef DBX_REGISTER_NUMBER + +/* Search for headers in $tooldir/arch/include and for libraries and + startfiles in $tooldir/arch/lib. */ +#define GCC_DRIVER_HOST_INITIALIZATION \ +do \ +{ \ + char *tooldir, *archdir; \ + tooldir = concat (tooldir_base_prefix, spec_machine, \ + dir_separator_str, NULL); \ + if (!IS_ABSOLUTE_PATH (tooldir)) \ + tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \ + spec_version, dir_separator_str, tooldir, NULL); \ + archdir = concat (tooldir, "arch", dir_separator_str, NULL); \ + add_prefix (&startfile_prefixes, \ + concat (archdir, "lib", dir_separator_str, NULL), \ + "GCC", PREFIX_PRIORITY_LAST, 0, 1); \ + add_prefix (&include_prefixes, archdir, \ + "GCC", PREFIX_PRIORITY_LAST, 0, 0); \ + } \ +while (0) diff --git a/gcc/config/xtensa/elf.opt b/gcc/config/xtensa/elf.opt new file mode 100644 index 000000000..bdeac15b2 --- /dev/null +++ b/gcc/config/xtensa/elf.opt @@ -0,0 +1,30 @@ +; Xtensa ELF (bare metal) options. + +; Copyright (C) 2011 +; Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +; See the GCC internals manual (options.texi) for a description of +; this file's format. + +; Please try to keep this file in ASCII collating order. + +rdynamic +Driver + +; This comment is to ensure we retain the blank line above. diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S new file mode 100644 index 000000000..9b46889bd --- /dev/null +++ b/gcc/config/xtensa/ieee754-df.S @@ -0,0 +1,2388 @@ +/* IEEE-754 double-precision functions for Xtensa + Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. + Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef __XTENSA_EB__ +#define xh a2 +#define xl a3 +#define yh a4 +#define yl a5 +#else +#define xh a3 +#define xl a2 +#define yh a5 +#define yl a4 +#endif + +/* Warning! The branch displacements for some Xtensa branch instructions + are quite small, and this code has been carefully laid out to keep + branch targets in range. If you change anything, be sure to check that + the assembler is not relaxing anything to branch over a jump. */ + +#ifdef L_negdf2 + + .align 4 + .global __negdf2 + .type __negdf2, @function +__negdf2: + leaf_entry sp, 16 + movi a4, 0x80000000 + xor xh, xh, a4 + leaf_return + +#endif /* L_negdf2 */ + +#ifdef L_addsubdf3 + + /* Addition */ +__adddf3_aux: + + /* Handle NaNs and Infinities. (This code is placed before the + start of the function just to keep it in range of the limited + branch displacements.) */ + +.Ladd_xnan_or_inf: + /* If y is neither Infinity nor NaN, return x. */ + bnall yh, a6, 1f + /* If x is a NaN, return it. Otherwise, return y. */ + slli a7, xh, 12 + or a7, a7, xl + beqz a7, .Ladd_ynan_or_inf +1: leaf_return + +.Ladd_ynan_or_inf: + /* Return y. */ + mov xh, yh + mov xl, yl + leaf_return + +.Ladd_opposite_signs: + /* Operand signs differ. Do a subtraction. */ + slli a7, a6, 11 + xor yh, yh, a7 + j .Lsub_same_sign + + .align 4 + .global __adddf3 + .type __adddf3, @function +__adddf3: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + + /* Check if the two operands have the same sign. */ + xor a7, xh, yh + bltz a7, .Ladd_opposite_signs + +.Ladd_same_sign: + /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ + ball xh, a6, .Ladd_xnan_or_inf + ball yh, a6, .Ladd_ynan_or_inf + + /* Compare the exponents. The smaller operand will be shifted + right by the exponent difference and added to the larger + one. */ + extui a7, xh, 20, 12 + extui a8, yh, 20, 12 + bltu a7, a8, .Ladd_shiftx + +.Ladd_shifty: + /* Check if the smaller (or equal) exponent is zero. */ + bnone yh, a6, .Ladd_yexpzero + + /* Replace yh sign/exponent with 0x001. */ + or yh, yh, a6 + slli yh, yh, 11 + srli yh, yh, 11 + +.Ladd_yexpdiff: + /* Compute the exponent difference. Optimize for difference < 32. */ + sub a10, a7, a8 + bgeui a10, 32, .Ladd_bigshifty + + /* Shift yh/yl right by the exponent difference. Any bits that are + shifted out of yl are saved in a9 for rounding the result. */ + ssr a10 + movi a9, 0 + src a9, yl, a9 + src yl, yh, yl + srl yh, yh + +.Ladd_addy: + /* Do the 64-bit addition. */ + add xl, xl, yl + add xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, 1 +1: + /* Check if the add overflowed into the exponent. */ + extui a10, xh, 20, 12 + beq a10, a7, .Ladd_round + mov a8, a7 + j .Ladd_carry + +.Ladd_yexpzero: + /* y is a subnormal value. Replace its sign/exponent with zero, + i.e., no implicit "1.0", and increment the apparent exponent + because subnormals behave as if they had the minimum (nonzero) + exponent. Test for the case when both exponents are zero. */ + slli yh, yh, 12 + srli yh, yh, 12 + bnone xh, a6, .Ladd_bothexpzero + addi a8, a8, 1 + j .Ladd_yexpdiff + +.Ladd_bothexpzero: + /* Both exponents are zero. Handle this as a special case. There + is no need to shift or round, and the normal code for handling + a carry into the exponent field will not work because it + assumes there is an implicit "1.0" that needs to be added. */ + add xl, xl, yl + add xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, 1 +1: leaf_return + +.Ladd_bigshifty: + /* Exponent difference > 64 -- just return the bigger value. */ + bgeui a10, 64, 1b + + /* Shift yh/yl right by the exponent difference. Any bits that are + shifted out are saved in a9 for rounding the result. */ + ssr a10 + sll a11, yl /* lost bits shifted out of yl */ + src a9, yh, yl + srl yl, yh + movi yh, 0 + beqz a11, .Ladd_addy + or a9, a9, a10 /* any positive, nonzero value will work */ + j .Ladd_addy + +.Ladd_xexpzero: + /* Same as "yexpzero" except skip handling the case when both + exponents are zero. */ + slli xh, xh, 12 + srli xh, xh, 12 + addi a7, a7, 1 + j .Ladd_xexpdiff + +.Ladd_shiftx: + /* Same thing as the "shifty" code, but with x and y swapped. Also, + because the exponent difference is always nonzero in this version, + the shift sequence can use SLL and skip loading a constant zero. */ + bnone xh, a6, .Ladd_xexpzero + + or xh, xh, a6 + slli xh, xh, 11 + srli xh, xh, 11 + +.Ladd_xexpdiff: + sub a10, a8, a7 + bgeui a10, 32, .Ladd_bigshiftx + + ssr a10 + sll a9, xl + src xl, xh, xl + srl xh, xh + +.Ladd_addx: + add xl, xl, yl + add xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, 1 +1: + /* Check if the add overflowed into the exponent. */ + extui a10, xh, 20, 12 + bne a10, a8, .Ladd_carry + +.Ladd_round: + /* Round up if the leftover fraction is >= 1/2. */ + bgez a9, 1f + addi xl, xl, 1 + beqz xl, .Ladd_roundcarry + + /* Check if the leftover fraction is exactly 1/2. */ + slli a9, a9, 1 + beqz a9, .Ladd_exactlyhalf +1: leaf_return + +.Ladd_bigshiftx: + /* Mostly the same thing as "bigshifty".... */ + bgeui a10, 64, .Ladd_returny + + ssr a10 + sll a11, xl + src a9, xh, xl + srl xl, xh + movi xh, 0 + beqz a11, .Ladd_addx + or a9, a9, a10 + j .Ladd_addx + +.Ladd_returny: + mov xh, yh + mov xl, yl + leaf_return + +.Ladd_carry: + /* The addition has overflowed into the exponent field, so the + value needs to be renormalized. The mantissa of the result + can be recovered by subtracting the original exponent and + adding 0x100000 (which is the explicit "1.0" for the + mantissa of the non-shifted operand -- the "1.0" for the + shifted operand was already added). The mantissa can then + be shifted right by one bit. The explicit "1.0" of the + shifted mantissa then needs to be replaced by the exponent, + incremented by one to account for the normalizing shift. + It is faster to combine these operations: do the shift first + and combine the additions and subtractions. If x is the + original exponent, the result is: + shifted mantissa - (x << 19) + (1 << 19) + (x << 20) + or: + shifted mantissa + ((x + 1) << 19) + Note that the exponent is incremented here by leaving the + explicit "1.0" of the mantissa in the exponent field. */ + + /* Shift xh/xl right by one bit. Save the lsb of xl. */ + mov a10, xl + ssai 1 + src xl, xh, xl + srl xh, xh + + /* See explanation above. The original exponent is in a8. */ + addi a8, a8, 1 + slli a8, a8, 19 + add xh, xh, a8 + + /* Return an Infinity if the exponent overflowed. */ + ball xh, a6, .Ladd_infinity + + /* Same thing as the "round" code except the msb of the leftover + fraction is bit 0 of a10, with the rest of the fraction in a9. */ + bbci.l a10, 0, 1f + addi xl, xl, 1 + beqz xl, .Ladd_roundcarry + beqz a9, .Ladd_exactlyhalf +1: leaf_return + +.Ladd_infinity: + /* Clear the mantissa. */ + movi xl, 0 + srli xh, xh, 20 + slli xh, xh, 20 + + /* The sign bit may have been lost in a carry-out. Put it back. */ + slli a8, a8, 1 + or xh, xh, a8 + leaf_return + +.Ladd_exactlyhalf: + /* Round down to the nearest even value. */ + srli xl, xl, 1 + slli xl, xl, 1 + leaf_return + +.Ladd_roundcarry: + /* xl is always zero when the rounding increment overflows, so + there's no need to round it to an even value. */ + addi xh, xh, 1 + /* Overflow to the exponent is OK. */ + leaf_return + + + /* Subtraction */ +__subdf3_aux: + + /* Handle NaNs and Infinities. (This code is placed before the + start of the function just to keep it in range of the limited + branch displacements.) */ + +.Lsub_xnan_or_inf: + /* If y is neither Infinity nor NaN, return x. */ + bnall yh, a6, 1f + /* Both x and y are either NaN or Inf, so the result is NaN. */ + movi a4, 0x80000 /* make it a quiet NaN */ + or xh, xh, a4 +1: leaf_return + +.Lsub_ynan_or_inf: + /* Negate y and return it. */ + slli a7, a6, 11 + xor xh, yh, a7 + mov xl, yl + leaf_return + +.Lsub_opposite_signs: + /* Operand signs differ. Do an addition. */ + slli a7, a6, 11 + xor yh, yh, a7 + j .Ladd_same_sign + + .align 4 + .global __subdf3 + .type __subdf3, @function +__subdf3: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + + /* Check if the two operands have the same sign. */ + xor a7, xh, yh + bltz a7, .Lsub_opposite_signs + +.Lsub_same_sign: + /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ + ball xh, a6, .Lsub_xnan_or_inf + ball yh, a6, .Lsub_ynan_or_inf + + /* Compare the operands. In contrast to addition, the entire + value matters here. */ + extui a7, xh, 20, 11 + extui a8, yh, 20, 11 + bltu xh, yh, .Lsub_xsmaller + beq xh, yh, .Lsub_compare_low + +.Lsub_ysmaller: + /* Check if the smaller (or equal) exponent is zero. */ + bnone yh, a6, .Lsub_yexpzero + + /* Replace yh sign/exponent with 0x001. */ + or yh, yh, a6 + slli yh, yh, 11 + srli yh, yh, 11 + +.Lsub_yexpdiff: + /* Compute the exponent difference. Optimize for difference < 32. */ + sub a10, a7, a8 + bgeui a10, 32, .Lsub_bigshifty + + /* Shift yh/yl right by the exponent difference. Any bits that are + shifted out of yl are saved in a9 for rounding the result. */ + ssr a10 + movi a9, 0 + src a9, yl, a9 + src yl, yh, yl + srl yh, yh + +.Lsub_suby: + /* Do the 64-bit subtraction. */ + sub xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, -1 +1: sub xl, xl, yl + + /* Subtract the leftover bits in a9 from zero and propagate any + borrow from xh/xl. */ + neg a9, a9 + beqz a9, 1f + addi a5, xh, -1 + moveqz xh, a5, xl + addi xl, xl, -1 +1: + /* Check if the subtract underflowed into the exponent. */ + extui a10, xh, 20, 11 + beq a10, a7, .Lsub_round + j .Lsub_borrow + +.Lsub_compare_low: + /* The high words are equal. Compare the low words. */ + bltu xl, yl, .Lsub_xsmaller + bltu yl, xl, .Lsub_ysmaller + /* The operands are equal. Return 0.0. */ + movi xh, 0 + movi xl, 0 +1: leaf_return + +.Lsub_yexpzero: + /* y is a subnormal value. Replace its sign/exponent with zero, + i.e., no implicit "1.0". Unless x is also a subnormal, increment + y's apparent exponent because subnormals behave as if they had + the minimum (nonzero) exponent. */ + slli yh, yh, 12 + srli yh, yh, 12 + bnone xh, a6, .Lsub_yexpdiff + addi a8, a8, 1 + j .Lsub_yexpdiff + +.Lsub_bigshifty: + /* Exponent difference > 64 -- just return the bigger value. */ + bgeui a10, 64, 1b + + /* Shift yh/yl right by the exponent difference. Any bits that are + shifted out are saved in a9 for rounding the result. */ + ssr a10 + sll a11, yl /* lost bits shifted out of yl */ + src a9, yh, yl + srl yl, yh + movi yh, 0 + beqz a11, .Lsub_suby + or a9, a9, a10 /* any positive, nonzero value will work */ + j .Lsub_suby + +.Lsub_xsmaller: + /* Same thing as the "ysmaller" code, but with x and y swapped and + with y negated. */ + bnone xh, a6, .Lsub_xexpzero + + or xh, xh, a6 + slli xh, xh, 11 + srli xh, xh, 11 + +.Lsub_xexpdiff: + sub a10, a8, a7 + bgeui a10, 32, .Lsub_bigshiftx + + ssr a10 + movi a9, 0 + src a9, xl, a9 + src xl, xh, xl + srl xh, xh + + /* Negate y. */ + slli a11, a6, 11 + xor yh, yh, a11 + +.Lsub_subx: + sub xl, yl, xl + sub xh, yh, xh + bgeu yl, xl, 1f + addi xh, xh, -1 +1: + /* Subtract the leftover bits in a9 from zero and propagate any + borrow from xh/xl. */ + neg a9, a9 + beqz a9, 1f + addi a5, xh, -1 + moveqz xh, a5, xl + addi xl, xl, -1 +1: + /* Check if the subtract underflowed into the exponent. */ + extui a10, xh, 20, 11 + bne a10, a8, .Lsub_borrow + +.Lsub_round: + /* Round up if the leftover fraction is >= 1/2. */ + bgez a9, 1f + addi xl, xl, 1 + beqz xl, .Lsub_roundcarry + + /* Check if the leftover fraction is exactly 1/2. */ + slli a9, a9, 1 + beqz a9, .Lsub_exactlyhalf +1: leaf_return + +.Lsub_xexpzero: + /* Same as "yexpzero". */ + slli xh, xh, 12 + srli xh, xh, 12 + bnone yh, a6, .Lsub_xexpdiff + addi a7, a7, 1 + j .Lsub_xexpdiff + +.Lsub_bigshiftx: + /* Mostly the same thing as "bigshifty", but with the sign bit of the + shifted value set so that the subsequent subtraction flips the + sign of y. */ + bgeui a10, 64, .Lsub_returny + + ssr a10 + sll a11, xl + src a9, xh, xl + srl xl, xh + slli xh, a6, 11 /* set sign bit of xh */ + beqz a11, .Lsub_subx + or a9, a9, a10 + j .Lsub_subx + +.Lsub_returny: + /* Negate and return y. */ + slli a7, a6, 11 + xor xh, yh, a7 + mov xl, yl + leaf_return + +.Lsub_borrow: + /* The subtraction has underflowed into the exponent field, so the + value needs to be renormalized. Shift the mantissa left as + needed to remove any leading zeros and adjust the exponent + accordingly. If the exponent is not large enough to remove + all the leading zeros, the result will be a subnormal value. */ + + slli a8, xh, 12 + beqz a8, .Lsub_xhzero + do_nsau a6, a8, a7, a11 + srli a8, a8, 12 + bge a6, a10, .Lsub_subnormal + addi a6, a6, 1 + +.Lsub_shift_lt32: + /* Shift the mantissa (a8/xl/a9) left by a6. */ + ssl a6 + src a8, a8, xl + src xl, xl, a9 + sll a9, a9 + + /* Combine the shifted mantissa with the sign and exponent, + decrementing the exponent by a6. (The exponent has already + been decremented by one due to the borrow from the subtraction, + but adding the mantissa will increment the exponent by one.) */ + srli xh, xh, 20 + sub xh, xh, a6 + slli xh, xh, 20 + add xh, xh, a8 + j .Lsub_round + +.Lsub_exactlyhalf: + /* Round down to the nearest even value. */ + srli xl, xl, 1 + slli xl, xl, 1 + leaf_return + +.Lsub_roundcarry: + /* xl is always zero when the rounding increment overflows, so + there's no need to round it to an even value. */ + addi xh, xh, 1 + /* Overflow to the exponent is OK. */ + leaf_return + +.Lsub_xhzero: + /* When normalizing the result, all the mantissa bits in the high + word are zero. Shift by "20 + (leading zero count of xl) + 1". */ + do_nsau a6, xl, a7, a11 + addi a6, a6, 21 + blt a10, a6, .Lsub_subnormal + +.Lsub_normalize_shift: + bltui a6, 32, .Lsub_shift_lt32 + + ssl a6 + src a8, xl, a9 + sll xl, a9 + movi a9, 0 + + srli xh, xh, 20 + sub xh, xh, a6 + slli xh, xh, 20 + add xh, xh, a8 + j .Lsub_round + +.Lsub_subnormal: + /* The exponent is too small to shift away all the leading zeros. + Set a6 to the current exponent (which has already been + decremented by the borrow) so that the exponent of the result + will be zero. Do not add 1 to a6 in this case, because: (1) + adding the mantissa will not increment the exponent, so there is + no need to subtract anything extra from the exponent to + compensate, and (2) the effective exponent of a subnormal is 1 + not 0 so the shift amount must be 1 smaller than normal. */ + mov a6, a10 + j .Lsub_normalize_shift + +#endif /* L_addsubdf3 */ + +#ifdef L_muldf3 + + /* Multiplication */ +#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 1 +#endif + +__muldf3_aux: + + /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). + (This code is placed before the start of the function just to + keep it in range of the limited branch displacements.) */ + +.Lmul_xexpzero: + /* Clear the sign bit of x. */ + slli xh, xh, 1 + srli xh, xh, 1 + + /* If x is zero, return zero. */ + or a10, xh, xl + beqz a10, .Lmul_return_zero + + /* Normalize x. Adjust the exponent in a8. */ + beqz xh, .Lmul_xh_zero + do_nsau a10, xh, a11, a12 + addi a10, a10, -11 + ssl a10 + src xh, xh, xl + sll xl, xl + movi a8, 1 + sub a8, a8, a10 + j .Lmul_xnormalized +.Lmul_xh_zero: + do_nsau a10, xl, a11, a12 + addi a10, a10, -11 + movi a8, -31 + sub a8, a8, a10 + ssl a10 + bltz a10, .Lmul_xl_srl + sll xh, xl + movi xl, 0 + j .Lmul_xnormalized +.Lmul_xl_srl: + srl xh, xl + sll xl, xl + j .Lmul_xnormalized + +.Lmul_yexpzero: + /* Clear the sign bit of y. */ + slli yh, yh, 1 + srli yh, yh, 1 + + /* If y is zero, return zero. */ + or a10, yh, yl + beqz a10, .Lmul_return_zero + + /* Normalize y. Adjust the exponent in a9. */ + beqz yh, .Lmul_yh_zero + do_nsau a10, yh, a11, a12 + addi a10, a10, -11 + ssl a10 + src yh, yh, yl + sll yl, yl + movi a9, 1 + sub a9, a9, a10 + j .Lmul_ynormalized +.Lmul_yh_zero: + do_nsau a10, yl, a11, a12 + addi a10, a10, -11 + movi a9, -31 + sub a9, a9, a10 + ssl a10 + bltz a10, .Lmul_yl_srl + sll yh, yl + movi yl, 0 + j .Lmul_ynormalized +.Lmul_yl_srl: + srl yh, yl + sll yl, yl + j .Lmul_ynormalized + +.Lmul_return_zero: + /* Return zero with the appropriate sign bit. */ + srli xh, a7, 31 + slli xh, xh, 31 + movi xl, 0 + j .Lmul_done + +.Lmul_xnan_or_inf: + /* If y is zero, return NaN. */ + bnez yl, 1f + slli a8, yh, 1 + bnez a8, 1f + movi a4, 0x80000 /* make it a quiet NaN */ + or xh, xh, a4 + j .Lmul_done +1: + /* If y is NaN, return y. */ + bnall yh, a6, .Lmul_returnx + slli a8, yh, 12 + or a8, a8, yl + beqz a8, .Lmul_returnx + +.Lmul_returny: + mov xh, yh + mov xl, yl + +.Lmul_returnx: + /* Set the sign bit and return. */ + extui a7, a7, 31, 1 + slli xh, xh, 1 + ssai 1 + src xh, a7, xh + j .Lmul_done + +.Lmul_ynan_or_inf: + /* If x is zero, return NaN. */ + bnez xl, .Lmul_returny + slli a8, xh, 1 + bnez a8, .Lmul_returny + movi a7, 0x80000 /* make it a quiet NaN */ + or xh, yh, a7 + j .Lmul_done + + .align 4 + .global __muldf3 + .type __muldf3, @function +__muldf3: +#if __XTENSA_CALL0_ABI__ + leaf_entry sp, 32 + addi sp, sp, -32 + s32i a12, sp, 16 + s32i a13, sp, 20 + s32i a14, sp, 24 + s32i a15, sp, 28 +#elif XCHAL_NO_MUL + /* This is not really a leaf function; allocate enough stack space + to allow CALL12s to a helper function. */ + leaf_entry sp, 64 +#else + leaf_entry sp, 32 +#endif + movi a6, 0x7ff00000 + + /* Get the sign of the result. */ + xor a7, xh, yh + + /* Check for NaN and infinity. */ + ball xh, a6, .Lmul_xnan_or_inf + ball yh, a6, .Lmul_ynan_or_inf + + /* Extract the exponents. */ + extui a8, xh, 20, 11 + extui a9, yh, 20, 11 + + beqz a8, .Lmul_xexpzero +.Lmul_xnormalized: + beqz a9, .Lmul_yexpzero +.Lmul_ynormalized: + + /* Add the exponents. */ + add a8, a8, a9 + + /* Replace sign/exponent fields with explicit "1.0". */ + movi a10, 0x1fffff + or xh, xh, a6 + and xh, xh, a10 + or yh, yh, a6 + and yh, yh, a10 + + /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. + The least-significant word of the result is thrown away except + that if it is nonzero, the lsb of a6 is set to 1. */ +#if XCHAL_HAVE_MUL32_HIGH + + /* Compute a6 with any carry-outs in a10. */ + movi a10, 0 + mull a6, xl, yh + mull a11, xh, yl + add a6, a6, a11 + bgeu a6, a11, 1f + addi a10, a10, 1 +1: + muluh a11, xl, yl + add a6, a6, a11 + bgeu a6, a11, 1f + addi a10, a10, 1 +1: + /* If the low word of the result is nonzero, set the lsb of a6. */ + mull a11, xl, yl + beqz a11, 1f + movi a9, 1 + or a6, a6, a9 +1: + /* Compute xl with any carry-outs in a9. */ + movi a9, 0 + mull a11, xh, yh + add a10, a10, a11 + bgeu a10, a11, 1f + addi a9, a9, 1 +1: + muluh a11, xh, yl + add a10, a10, a11 + bgeu a10, a11, 1f + addi a9, a9, 1 +1: + muluh xl, xl, yh + add xl, xl, a10 + bgeu xl, a10, 1f + addi a9, a9, 1 +1: + /* Compute xh. */ + muluh xh, xh, yh + add xh, xh, a9 + +#else /* ! XCHAL_HAVE_MUL32_HIGH */ + + /* Break the inputs into 16-bit chunks and compute 16 32-bit partial + products. These partial products are: + + 0 xll * yll + + 1 xll * ylh + 2 xlh * yll + + 3 xll * yhl + 4 xlh * ylh + 5 xhl * yll + + 6 xll * yhh + 7 xlh * yhl + 8 xhl * ylh + 9 xhh * yll + + 10 xlh * yhh + 11 xhl * yhl + 12 xhh * ylh + + 13 xhl * yhh + 14 xhh * yhl + + 15 xhh * yhh + + where the input chunks are (hh, hl, lh, ll). If using the Mul16 + or Mul32 multiplier options, these input chunks must be stored in + separate registers. For Mac16, the UMUL.AA.* opcodes can specify + that the inputs come from either half of the registers, so there + is no need to shift them out ahead of time. If there is no + multiply hardware, the 16-bit chunks can be extracted when setting + up the arguments to the separate multiply function. */ + + /* Save a7 since it is needed to hold a temporary value. */ + s32i a7, sp, 4 +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + /* Calling a separate multiply function will clobber a0 and requires + use of a8 as a temporary, so save those values now. (The function + uses a custom ABI so nothing else needs to be saved.) */ + s32i a0, sp, 0 + s32i a8, sp, 8 +#endif + +#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 + +#define xlh a12 +#define ylh a13 +#define xhh a14 +#define yhh a15 + + /* Get the high halves of the inputs into registers. */ + srli xlh, xl, 16 + srli ylh, yl, 16 + srli xhh, xh, 16 + srli yhh, yh, 16 + +#define xll xl +#define yll yl +#define xhl xh +#define yhl yh + +#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 + /* Clear the high halves of the inputs. This does not matter + for MUL16 because the high bits are ignored. */ + extui xl, xl, 0, 16 + extui xh, xh, 0, 16 + extui yl, yl, 0, 16 + extui yh, yh, 0, 16 +#endif +#endif /* MUL16 || MUL32 */ + + +#if XCHAL_HAVE_MUL16 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mul16u dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MUL32 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mull dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MAC16 + +/* The preprocessor insists on inserting a space when concatenating after + a period in the definition of do_mul below. These macros are a workaround + using underscores instead of periods when doing the concatenation. */ +#define umul_aa_ll umul.aa.ll +#define umul_aa_lh umul.aa.lh +#define umul_aa_hl umul.aa.hl +#define umul_aa_hh umul.aa.hh + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + umul_aa_ ## xhalf ## yhalf xreg, yreg; \ + rsr dst, ACCLO + +#else /* no multiply hardware */ + +#define set_arg_l(dst, src) \ + extui dst, src, 0, 16 +#define set_arg_h(dst, src) \ + srli dst, src, 16 + +#if __XTENSA_CALL0_ABI__ +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a13, xreg); \ + set_arg_ ## yhalf (a14, yreg); \ + call0 .Lmul_mulsi3; \ + mov dst, a12 +#else +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a14, xreg); \ + set_arg_ ## yhalf (a15, yreg); \ + call12 .Lmul_mulsi3; \ + mov dst, a14 +#endif /* __XTENSA_CALL0_ABI__ */ + +#endif /* no multiply hardware */ + + /* Add pp1 and pp2 into a10 with carry-out in a9. */ + do_mul(a10, xl, l, yl, h) /* pp 1 */ + do_mul(a11, xl, h, yl, l) /* pp 2 */ + movi a9, 0 + add a10, a10, a11 + bgeu a10, a11, 1f + addi a9, a9, 1 +1: + /* Initialize a6 with a9/a10 shifted into position. Note that + this value can be safely incremented without any carry-outs. */ + ssai 16 + src a6, a9, a10 + + /* Compute the low word into a10. */ + do_mul(a11, xl, l, yl, l) /* pp 0 */ + sll a10, a10 + add a10, a10, a11 + bgeu a10, a11, 1f + addi a6, a6, 1 +1: + /* Compute the contributions of pp0-5 to a6, with carry-outs in a9. + This is good enough to determine the low half of a6, so that any + nonzero bits from the low word of the result can be collapsed + into a6, freeing up a register. */ + movi a9, 0 + do_mul(a11, xl, l, yh, l) /* pp 3 */ + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + do_mul(a11, xl, h, yl, h) /* pp 4 */ + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + do_mul(a11, xh, l, yl, l) /* pp 5 */ + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Collapse any nonzero bits from the low word into a6. */ + beqz a10, 1f + movi a11, 1 + or a6, a6, a11 +1: + /* Add pp6-9 into a11 with carry-outs in a10. */ + do_mul(a7, xl, l, yh, h) /* pp 6 */ + do_mul(a11, xh, h, yl, l) /* pp 9 */ + movi a10, 0 + add a11, a11, a7 + bgeu a11, a7, 1f + addi a10, a10, 1 +1: + do_mul(a7, xl, h, yh, l) /* pp 7 */ + add a11, a11, a7 + bgeu a11, a7, 1f + addi a10, a10, 1 +1: + do_mul(a7, xh, l, yl, h) /* pp 8 */ + add a11, a11, a7 + bgeu a11, a7, 1f + addi a10, a10, 1 +1: + /* Shift a10/a11 into position, and add low half of a11 to a6. */ + src a10, a10, a11 + add a10, a10, a9 + sll a11, a11 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a10, a10, 1 +1: + /* Add pp10-12 into xl with carry-outs in a9. */ + movi a9, 0 + do_mul(xl, xl, h, yh, h) /* pp 10 */ + add xl, xl, a10 + bgeu xl, a10, 1f + addi a9, a9, 1 +1: + do_mul(a10, xh, l, yh, l) /* pp 11 */ + add xl, xl, a10 + bgeu xl, a10, 1f + addi a9, a9, 1 +1: + do_mul(a10, xh, h, yl, h) /* pp 12 */ + add xl, xl, a10 + bgeu xl, a10, 1f + addi a9, a9, 1 +1: + /* Add pp13-14 into a11 with carry-outs in a10. */ + do_mul(a11, xh, l, yh, h) /* pp 13 */ + do_mul(a7, xh, h, yh, l) /* pp 14 */ + movi a10, 0 + add a11, a11, a7 + bgeu a11, a7, 1f + addi a10, a10, 1 +1: + /* Shift a10/a11 into position, and add low half of a11 to a6. */ + src a10, a10, a11 + add a10, a10, a9 + sll a11, a11 + add xl, xl, a11 + bgeu xl, a11, 1f + addi a10, a10, 1 +1: + /* Compute xh. */ + do_mul(xh, xh, h, yh, h) /* pp 15 */ + add xh, xh, a10 + + /* Restore values saved on the stack during the multiplication. */ + l32i a7, sp, 4 +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + l32i a0, sp, 0 + l32i a8, sp, 8 +#endif +#endif /* ! XCHAL_HAVE_MUL32_HIGH */ + + /* Shift left by 12 bits, unless there was a carry-out from the + multiply, in which case, shift by 11 bits and increment the + exponent. Note: It is convenient to use the constant 0x3ff + instead of 0x400 when removing the extra exponent bias (so that + it is easy to construct 0x7fe for the overflow check). Reverse + the logic here to decrement the exponent sum by one unless there + was a carry-out. */ + movi a4, 11 + srli a5, xh, 21 - 12 + bnez a5, 1f + addi a4, a4, 1 + addi a8, a8, -1 +1: ssl a4 + src xh, xh, xl + src xl, xl, a6 + sll a6, a6 + + /* Subtract the extra bias from the exponent sum (plus one to account + for the explicit "1.0" of the mantissa that will be added to the + exponent in the final result). */ + movi a4, 0x3ff + sub a8, a8, a4 + + /* Check for over/underflow. The value in a8 is one less than the + final exponent, so values in the range 0..7fd are OK here. */ + slli a4, a4, 1 /* 0x7fe */ + bgeu a8, a4, .Lmul_overflow + +.Lmul_round: + /* Round. */ + bgez a6, .Lmul_rounded + addi xl, xl, 1 + beqz xl, .Lmul_roundcarry + slli a6, a6, 1 + beqz a6, .Lmul_exactlyhalf + +.Lmul_rounded: + /* Add the exponent to the mantissa. */ + slli a8, a8, 20 + add xh, xh, a8 + +.Lmul_addsign: + /* Add the sign bit. */ + srli a7, a7, 31 + slli a7, a7, 31 + or xh, xh, a7 + +.Lmul_done: +#if __XTENSA_CALL0_ABI__ + l32i a12, sp, 16 + l32i a13, sp, 20 + l32i a14, sp, 24 + l32i a15, sp, 28 + addi sp, sp, 32 +#endif + leaf_return + +.Lmul_exactlyhalf: + /* Round down to the nearest even value. */ + srli xl, xl, 1 + slli xl, xl, 1 + j .Lmul_rounded + +.Lmul_roundcarry: + /* xl is always zero when the rounding increment overflows, so + there's no need to round it to an even value. */ + addi xh, xh, 1 + /* Overflow is OK -- it will be added to the exponent. */ + j .Lmul_rounded + +.Lmul_overflow: + bltz a8, .Lmul_underflow + /* Return +/- Infinity. */ + addi a8, a4, 1 /* 0x7ff */ + slli xh, a8, 20 + movi xl, 0 + j .Lmul_addsign + +.Lmul_underflow: + /* Create a subnormal value, where the exponent field contains zero, + but the effective exponent is 1. The value of a8 is one less than + the actual exponent, so just negate it to get the shift amount. */ + neg a8, a8 + mov a9, a6 + ssr a8 + bgeui a8, 32, .Lmul_bigshift + + /* Shift xh/xl right. Any bits that are shifted out of xl are saved + in a6 (combined with the shifted-out bits currently in a6) for + rounding the result. */ + sll a6, xl + src xl, xh, xl + srl xh, xh + j 1f + +.Lmul_bigshift: + bgeui a8, 64, .Lmul_flush_to_zero + sll a10, xl /* lost bits shifted out of xl */ + src a6, xh, xl + srl xl, xh + movi xh, 0 + or a9, a9, a10 + + /* Set the exponent to zero. */ +1: movi a8, 0 + + /* Pack any nonzero bits shifted out into a6. */ + beqz a9, .Lmul_round + movi a9, 1 + or a6, a6, a9 + j .Lmul_round + +.Lmul_flush_to_zero: + /* Return zero with the appropriate sign bit. */ + srli xh, a7, 31 + slli xh, xh, 31 + movi xl, 0 + j .Lmul_done + +#if XCHAL_NO_MUL + + /* For Xtensa processors with no multiply hardware, this simplified + version of _mulsi3 is used for multiplying 16-bit chunks of + the floating-point mantissas. When using CALL0, this function + uses a custom ABI: the inputs are passed in a13 and a14, the + result is returned in a12, and a8 and a15 are clobbered. */ + .align 4 +.Lmul_mulsi3: + leaf_entry sp, 16 + .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 + movi \dst, 0 +1: add \tmp1, \src2, \dst + extui \tmp2, \src1, 0, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx2 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 1, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx4 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 2, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx8 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 3, 1 + movnez \dst, \tmp1, \tmp2 + + srli \src1, \src1, 4 + slli \src2, \src2, 4 + bnez \src1, 1b + .endm +#if __XTENSA_CALL0_ABI__ + mul_mulsi3_body a12, a13, a14, a15, a8 +#else + /* The result will be written into a2, so save that argument in a4. */ + mov a4, a2 + mul_mulsi3_body a2, a4, a3, a5, a6 +#endif + leaf_return +#endif /* XCHAL_NO_MUL */ +#endif /* L_muldf3 */ + +#ifdef L_divdf3 + + /* Division */ +__divdf3_aux: + + /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). + (This code is placed before the start of the function just to + keep it in range of the limited branch displacements.) */ + +.Ldiv_yexpzero: + /* Clear the sign bit of y. */ + slli yh, yh, 1 + srli yh, yh, 1 + + /* Check for division by zero. */ + or a10, yh, yl + beqz a10, .Ldiv_yzero + + /* Normalize y. Adjust the exponent in a9. */ + beqz yh, .Ldiv_yh_zero + do_nsau a10, yh, a11, a9 + addi a10, a10, -11 + ssl a10 + src yh, yh, yl + sll yl, yl + movi a9, 1 + sub a9, a9, a10 + j .Ldiv_ynormalized +.Ldiv_yh_zero: + do_nsau a10, yl, a11, a9 + addi a10, a10, -11 + movi a9, -31 + sub a9, a9, a10 + ssl a10 + bltz a10, .Ldiv_yl_srl + sll yh, yl + movi yl, 0 + j .Ldiv_ynormalized +.Ldiv_yl_srl: + srl yh, yl + sll yl, yl + j .Ldiv_ynormalized + +.Ldiv_yzero: + /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ + slli xh, xh, 1 + srli xh, xh, 1 + or xl, xl, xh + srli xh, a7, 31 + slli xh, xh, 31 + or xh, xh, a6 + bnez xl, 1f + movi a4, 0x80000 /* make it a quiet NaN */ + or xh, xh, a4 +1: movi xl, 0 + leaf_return + +.Ldiv_xexpzero: + /* Clear the sign bit of x. */ + slli xh, xh, 1 + srli xh, xh, 1 + + /* If x is zero, return zero. */ + or a10, xh, xl + beqz a10, .Ldiv_return_zero + + /* Normalize x. Adjust the exponent in a8. */ + beqz xh, .Ldiv_xh_zero + do_nsau a10, xh, a11, a8 + addi a10, a10, -11 + ssl a10 + src xh, xh, xl + sll xl, xl + movi a8, 1 + sub a8, a8, a10 + j .Ldiv_xnormalized +.Ldiv_xh_zero: + do_nsau a10, xl, a11, a8 + addi a10, a10, -11 + movi a8, -31 + sub a8, a8, a10 + ssl a10 + bltz a10, .Ldiv_xl_srl + sll xh, xl + movi xl, 0 + j .Ldiv_xnormalized +.Ldiv_xl_srl: + srl xh, xl + sll xl, xl + j .Ldiv_xnormalized + +.Ldiv_return_zero: + /* Return zero with the appropriate sign bit. */ + srli xh, a7, 31 + slli xh, xh, 31 + movi xl, 0 + leaf_return + +.Ldiv_xnan_or_inf: + /* Set the sign bit of the result. */ + srli a7, yh, 31 + slli a7, a7, 31 + xor xh, xh, a7 + /* If y is NaN or Inf, return NaN. */ + bnall yh, a6, 1f + movi a4, 0x80000 /* make it a quiet NaN */ + or xh, xh, a4 +1: leaf_return + +.Ldiv_ynan_or_inf: + /* If y is Infinity, return zero. */ + slli a8, yh, 12 + or a8, a8, yl + beqz a8, .Ldiv_return_zero + /* y is NaN; return it. */ + mov xh, yh + mov xl, yl + leaf_return + +.Ldiv_highequal1: + bltu xl, yl, 2f + j 3f + + .align 4 + .global __divdf3 + .type __divdf3, @function +__divdf3: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + + /* Get the sign of the result. */ + xor a7, xh, yh + + /* Check for NaN and infinity. */ + ball xh, a6, .Ldiv_xnan_or_inf + ball yh, a6, .Ldiv_ynan_or_inf + + /* Extract the exponents. */ + extui a8, xh, 20, 11 + extui a9, yh, 20, 11 + + beqz a9, .Ldiv_yexpzero +.Ldiv_ynormalized: + beqz a8, .Ldiv_xexpzero +.Ldiv_xnormalized: + + /* Subtract the exponents. */ + sub a8, a8, a9 + + /* Replace sign/exponent fields with explicit "1.0". */ + movi a10, 0x1fffff + or xh, xh, a6 + and xh, xh, a10 + or yh, yh, a6 + and yh, yh, a10 + + /* Set SAR for left shift by one. */ + ssai (32 - 1) + + /* The first digit of the mantissa division must be a one. + Shift x (and adjust the exponent) as needed to make this true. */ + bltu yh, xh, 3f + beq yh, xh, .Ldiv_highequal1 +2: src xh, xh, xl + sll xl, xl + addi a8, a8, -1 +3: + /* Do the first subtraction and shift. */ + sub xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, -1 +1: sub xl, xl, yl + src xh, xh, xl + sll xl, xl + + /* Put the quotient into a10/a11. */ + movi a10, 0 + movi a11, 1 + + /* Divide one bit at a time for 52 bits. */ + movi a9, 52 +#if XCHAL_HAVE_LOOPS + loop a9, .Ldiv_loopend +#endif +.Ldiv_loop: + /* Shift the quotient << 1. */ + src a10, a10, a11 + sll a11, a11 + + /* Is this digit a 0 or 1? */ + bltu xh, yh, 3f + beq xh, yh, .Ldiv_highequal2 + + /* Output a 1 and subtract. */ +2: addi a11, a11, 1 + sub xh, xh, yh + bgeu xl, yl, 1f + addi xh, xh, -1 +1: sub xl, xl, yl + + /* Shift the dividend << 1. */ +3: src xh, xh, xl + sll xl, xl + +#if !XCHAL_HAVE_LOOPS + addi a9, a9, -1 + bnez a9, .Ldiv_loop +#endif +.Ldiv_loopend: + + /* Add the exponent bias (less one to account for the explicit "1.0" + of the mantissa that will be added to the exponent in the final + result). */ + movi a9, 0x3fe + add a8, a8, a9 + + /* Check for over/underflow. The value in a8 is one less than the + final exponent, so values in the range 0..7fd are OK here. */ + addmi a9, a9, 0x400 /* 0x7fe */ + bgeu a8, a9, .Ldiv_overflow + +.Ldiv_round: + /* Round. The remainder (<< 1) is in xh/xl. */ + bltu xh, yh, .Ldiv_rounded + beq xh, yh, .Ldiv_highequal3 +.Ldiv_roundup: + addi a11, a11, 1 + beqz a11, .Ldiv_roundcarry + +.Ldiv_rounded: + mov xl, a11 + /* Add the exponent to the mantissa. */ + slli a8, a8, 20 + add xh, a10, a8 + +.Ldiv_addsign: + /* Add the sign bit. */ + srli a7, a7, 31 + slli a7, a7, 31 + or xh, xh, a7 + leaf_return + +.Ldiv_highequal2: + bgeu xl, yl, 2b + j 3b + +.Ldiv_highequal3: + bltu xl, yl, .Ldiv_rounded + bne xl, yl, .Ldiv_roundup + + /* Remainder is exactly half the divisor. Round even. */ + addi a11, a11, 1 + beqz a11, .Ldiv_roundcarry + srli a11, a11, 1 + slli a11, a11, 1 + j .Ldiv_rounded + +.Ldiv_overflow: + bltz a8, .Ldiv_underflow + /* Return +/- Infinity. */ + addi a8, a9, 1 /* 0x7ff */ + slli xh, a8, 20 + movi xl, 0 + j .Ldiv_addsign + +.Ldiv_underflow: + /* Create a subnormal value, where the exponent field contains zero, + but the effective exponent is 1. The value of a8 is one less than + the actual exponent, so just negate it to get the shift amount. */ + neg a8, a8 + ssr a8 + bgeui a8, 32, .Ldiv_bigshift + + /* Shift a10/a11 right. Any bits that are shifted out of a11 are + saved in a6 for rounding the result. */ + sll a6, a11 + src a11, a10, a11 + srl a10, a10 + j 1f + +.Ldiv_bigshift: + bgeui a8, 64, .Ldiv_flush_to_zero + sll a9, a11 /* lost bits shifted out of a11 */ + src a6, a10, a11 + srl a11, a10 + movi a10, 0 + or xl, xl, a9 + + /* Set the exponent to zero. */ +1: movi a8, 0 + + /* Pack any nonzero remainder (in xh/xl) into a6. */ + or xh, xh, xl + beqz xh, 1f + movi a9, 1 + or a6, a6, a9 + + /* Round a10/a11 based on the bits shifted out into a6. */ +1: bgez a6, .Ldiv_rounded + addi a11, a11, 1 + beqz a11, .Ldiv_roundcarry + slli a6, a6, 1 + bnez a6, .Ldiv_rounded + srli a11, a11, 1 + slli a11, a11, 1 + j .Ldiv_rounded + +.Ldiv_roundcarry: + /* a11 is always zero when the rounding increment overflows, so + there's no need to round it to an even value. */ + addi a10, a10, 1 + /* Overflow to the exponent field is OK. */ + j .Ldiv_rounded + +.Ldiv_flush_to_zero: + /* Return zero with the appropriate sign bit. */ + srli xh, a7, 31 + slli xh, xh, 31 + movi xl, 0 + leaf_return + +#endif /* L_divdf3 */ + +#ifdef L_cmpdf2 + + /* Equal and Not Equal */ + + .align 4 + .global __eqdf2 + .global __nedf2 + .set __nedf2, __eqdf2 + .type __eqdf2, @function +__eqdf2: + leaf_entry sp, 16 + bne xl, yl, 2f + bne xh, yh, 4f + + /* The values are equal but NaN != NaN. Check the exponent. */ + movi a6, 0x7ff00000 + ball xh, a6, 3f + + /* Equal. */ + movi a2, 0 + leaf_return + + /* Not equal. */ +2: movi a2, 1 + leaf_return + + /* Check if the mantissas are nonzero. */ +3: slli a7, xh, 12 + or a7, a7, xl + j 5f + + /* Check if x and y are zero with different signs. */ +4: or a7, xh, yh + slli a7, a7, 1 + or a7, a7, xl /* xl == yl here */ + + /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa + or x when exponent(x) = 0x7ff and x == y. */ +5: movi a2, 0 + movi a3, 1 + movnez a2, a3, a7 + leaf_return + + + /* Greater Than */ + + .align 4 + .global __gtdf2 + .type __gtdf2, @function +__gtdf2: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + ball xh, a6, 2f +1: bnall yh, a6, .Lle_cmp + + /* Check if y is a NaN. */ + slli a7, yh, 12 + or a7, a7, yl + beqz a7, .Lle_cmp + movi a2, 0 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, xh, 12 + or a7, a7, xl + beqz a7, 1b + movi a2, 0 + leaf_return + + + /* Less Than or Equal */ + + .align 4 + .global __ledf2 + .type __ledf2, @function +__ledf2: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + ball xh, a6, 2f +1: bnall yh, a6, .Lle_cmp + + /* Check if y is a NaN. */ + slli a7, yh, 12 + or a7, a7, yl + beqz a7, .Lle_cmp + movi a2, 1 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, xh, 12 + or a7, a7, xl + beqz a7, 1b + movi a2, 1 + leaf_return + +.Lle_cmp: + /* Check if x and y have different signs. */ + xor a7, xh, yh + bltz a7, .Lle_diff_signs + + /* Check if x is negative. */ + bltz xh, .Lle_xneg + + /* Check if x <= y. */ + bltu xh, yh, 4f + bne xh, yh, 5f + bltu yl, xl, 5f +4: movi a2, 0 + leaf_return + +.Lle_xneg: + /* Check if y <= x. */ + bltu yh, xh, 4b + bne yh, xh, 5f + bgeu xl, yl, 4b +5: movi a2, 1 + leaf_return + +.Lle_diff_signs: + bltz xh, 4b + + /* Check if both x and y are zero. */ + or a7, xh, yh + slli a7, a7, 1 + or a7, a7, xl + or a7, a7, yl + movi a2, 1 + movi a3, 0 + moveqz a2, a3, a7 + leaf_return + + + /* Greater Than or Equal */ + + .align 4 + .global __gedf2 + .type __gedf2, @function +__gedf2: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + ball xh, a6, 2f +1: bnall yh, a6, .Llt_cmp + + /* Check if y is a NaN. */ + slli a7, yh, 12 + or a7, a7, yl + beqz a7, .Llt_cmp + movi a2, -1 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, xh, 12 + or a7, a7, xl + beqz a7, 1b + movi a2, -1 + leaf_return + + + /* Less Than */ + + .align 4 + .global __ltdf2 + .type __ltdf2, @function +__ltdf2: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + ball xh, a6, 2f +1: bnall yh, a6, .Llt_cmp + + /* Check if y is a NaN. */ + slli a7, yh, 12 + or a7, a7, yl + beqz a7, .Llt_cmp + movi a2, 0 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, xh, 12 + or a7, a7, xl + beqz a7, 1b + movi a2, 0 + leaf_return + +.Llt_cmp: + /* Check if x and y have different signs. */ + xor a7, xh, yh + bltz a7, .Llt_diff_signs + + /* Check if x is negative. */ + bltz xh, .Llt_xneg + + /* Check if x < y. */ + bltu xh, yh, 4f + bne xh, yh, 5f + bgeu xl, yl, 5f +4: movi a2, -1 + leaf_return + +.Llt_xneg: + /* Check if y < x. */ + bltu yh, xh, 4b + bne yh, xh, 5f + bltu yl, xl, 4b +5: movi a2, 0 + leaf_return + +.Llt_diff_signs: + bgez xh, 5b + + /* Check if both x and y are nonzero. */ + or a7, xh, yh + slli a7, a7, 1 + or a7, a7, xl + or a7, a7, yl + movi a2, 0 + movi a3, -1 + movnez a2, a3, a7 + leaf_return + + + /* Unordered */ + + .align 4 + .global __unorddf2 + .type __unorddf2, @function +__unorddf2: + leaf_entry sp, 16 + movi a6, 0x7ff00000 + ball xh, a6, 3f +1: ball yh, a6, 4f +2: movi a2, 0 + leaf_return + +3: slli a7, xh, 12 + or a7, a7, xl + beqz a7, 1b + movi a2, 1 + leaf_return + +4: slli a7, yh, 12 + or a7, a7, yl + beqz a7, 2b + movi a2, 1 + leaf_return + +#endif /* L_cmpdf2 */ + +#ifdef L_fixdfsi + + .align 4 + .global __fixdfsi + .type __fixdfsi, @function +__fixdfsi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7ff00000 + ball xh, a6, .Lfixdfsi_nan_or_inf + + /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ + extui a4, xh, 20, 11 + extui a5, a6, 19, 10 /* 0x3fe */ + sub a4, a4, a5 + bgei a4, 32, .Lfixdfsi_maxint + blti a4, 1, .Lfixdfsi_zero + + /* Add explicit "1.0" and shift << 11. */ + or a7, xh, a6 + ssai (32 - 11) + src a5, a7, xl + + /* Shift back to the right, based on the exponent. */ + ssl a4 /* shift by 32 - a4 */ + srl a5, a5 + + /* Negate the result if sign != 0. */ + neg a2, a5 + movgez a2, a5, a7 + leaf_return + +.Lfixdfsi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, xh, 12 + or a4, a4, xl + beqz a4, .Lfixdfsi_maxint + + /* Translate NaN to +maxint. */ + movi xh, 0 + +.Lfixdfsi_maxint: + slli a4, a6, 11 /* 0x80000000 */ + addi a5, a4, -1 /* 0x7fffffff */ + movgez a4, a5, xh + mov a2, a4 + leaf_return + +.Lfixdfsi_zero: + movi a2, 0 + leaf_return + +#endif /* L_fixdfsi */ + +#ifdef L_fixdfdi + + .align 4 + .global __fixdfdi + .type __fixdfdi, @function +__fixdfdi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7ff00000 + ball xh, a6, .Lfixdfdi_nan_or_inf + + /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ + extui a4, xh, 20, 11 + extui a5, a6, 19, 10 /* 0x3fe */ + sub a4, a4, a5 + bgei a4, 64, .Lfixdfdi_maxint + blti a4, 1, .Lfixdfdi_zero + + /* Add explicit "1.0" and shift << 11. */ + or a7, xh, a6 + ssai (32 - 11) + src xh, a7, xl + sll xl, xl + + /* Shift back to the right, based on the exponent. */ + ssl a4 /* shift by 64 - a4 */ + bgei a4, 32, .Lfixdfdi_smallshift + srl xl, xh + movi xh, 0 + +.Lfixdfdi_shifted: + /* Negate the result if sign != 0. */ + bgez a7, 1f + neg xl, xl + neg xh, xh + beqz xl, 1f + addi xh, xh, -1 +1: leaf_return + +.Lfixdfdi_smallshift: + src xl, xh, xl + srl xh, xh + j .Lfixdfdi_shifted + +.Lfixdfdi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, xh, 12 + or a4, a4, xl + beqz a4, .Lfixdfdi_maxint + + /* Translate NaN to +maxint. */ + movi xh, 0 + +.Lfixdfdi_maxint: + slli a7, a6, 11 /* 0x80000000 */ + bgez xh, 1f + mov xh, a7 + movi xl, 0 + leaf_return + +1: addi xh, a7, -1 /* 0x7fffffff */ + movi xl, -1 + leaf_return + +.Lfixdfdi_zero: + movi xh, 0 + movi xl, 0 + leaf_return + +#endif /* L_fixdfdi */ + +#ifdef L_fixunsdfsi + + .align 4 + .global __fixunsdfsi + .type __fixunsdfsi, @function +__fixunsdfsi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7ff00000 + ball xh, a6, .Lfixunsdfsi_nan_or_inf + + /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ + extui a4, xh, 20, 11 + extui a5, a6, 20, 10 /* 0x3ff */ + sub a4, a4, a5 + bgei a4, 32, .Lfixunsdfsi_maxint + bltz a4, .Lfixunsdfsi_zero + + /* Add explicit "1.0" and shift << 11. */ + or a7, xh, a6 + ssai (32 - 11) + src a5, a7, xl + + /* Shift back to the right, based on the exponent. */ + addi a4, a4, 1 + beqi a4, 32, .Lfixunsdfsi_bigexp + ssl a4 /* shift by 32 - a4 */ + srl a5, a5 + + /* Negate the result if sign != 0. */ + neg a2, a5 + movgez a2, a5, a7 + leaf_return + +.Lfixunsdfsi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, xh, 12 + or a4, a4, xl + beqz a4, .Lfixunsdfsi_maxint + + /* Translate NaN to 0xffffffff. */ + movi a2, -1 + leaf_return + +.Lfixunsdfsi_maxint: + slli a4, a6, 11 /* 0x80000000 */ + movi a5, -1 /* 0xffffffff */ + movgez a4, a5, xh + mov a2, a4 + leaf_return + +.Lfixunsdfsi_zero: + movi a2, 0 + leaf_return + +.Lfixunsdfsi_bigexp: + /* Handle unsigned maximum exponent case. */ + bltz xh, 1f + mov a2, a5 /* no shift needed */ + leaf_return + + /* Return 0x80000000 if negative. */ +1: slli a2, a6, 11 + leaf_return + +#endif /* L_fixunsdfsi */ + +#ifdef L_fixunsdfdi + + .align 4 + .global __fixunsdfdi + .type __fixunsdfdi, @function +__fixunsdfdi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7ff00000 + ball xh, a6, .Lfixunsdfdi_nan_or_inf + + /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ + extui a4, xh, 20, 11 + extui a5, a6, 20, 10 /* 0x3ff */ + sub a4, a4, a5 + bgei a4, 64, .Lfixunsdfdi_maxint + bltz a4, .Lfixunsdfdi_zero + + /* Add explicit "1.0" and shift << 11. */ + or a7, xh, a6 + ssai (32 - 11) + src xh, a7, xl + sll xl, xl + + /* Shift back to the right, based on the exponent. */ + addi a4, a4, 1 + beqi a4, 64, .Lfixunsdfdi_bigexp + ssl a4 /* shift by 64 - a4 */ + bgei a4, 32, .Lfixunsdfdi_smallshift + srl xl, xh + movi xh, 0 + +.Lfixunsdfdi_shifted: + /* Negate the result if sign != 0. */ + bgez a7, 1f + neg xl, xl + neg xh, xh + beqz xl, 1f + addi xh, xh, -1 +1: leaf_return + +.Lfixunsdfdi_smallshift: + src xl, xh, xl + srl xh, xh + j .Lfixunsdfdi_shifted + +.Lfixunsdfdi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, xh, 12 + or a4, a4, xl + beqz a4, .Lfixunsdfdi_maxint + + /* Translate NaN to 0xffffffff.... */ +1: movi xh, -1 + movi xl, -1 + leaf_return + +.Lfixunsdfdi_maxint: + bgez xh, 1b +2: slli xh, a6, 11 /* 0x80000000 */ + movi xl, 0 + leaf_return + +.Lfixunsdfdi_zero: + movi xh, 0 + movi xl, 0 + leaf_return + +.Lfixunsdfdi_bigexp: + /* Handle unsigned maximum exponent case. */ + bltz a7, 2b + leaf_return /* no shift needed */ + +#endif /* L_fixunsdfdi */ + +#ifdef L_floatsidf + + .align 4 + .global __floatunsidf + .type __floatunsidf, @function +__floatunsidf: + leaf_entry sp, 16 + beqz a2, .Lfloatsidf_return_zero + + /* Set the sign to zero and jump to the floatsidf code. */ + movi a7, 0 + j .Lfloatsidf_normalize + + .align 4 + .global __floatsidf + .type __floatsidf, @function +__floatsidf: + leaf_entry sp, 16 + + /* Check for zero. */ + beqz a2, .Lfloatsidf_return_zero + + /* Save the sign. */ + extui a7, a2, 31, 1 + + /* Get the absolute value. */ +#if XCHAL_HAVE_ABS + abs a2, a2 +#else + neg a4, a2 + movltz a2, a4, a2 +#endif + +.Lfloatsidf_normalize: + /* Normalize with the first 1 bit in the msb. */ + do_nsau a4, a2, a5, a6 + ssl a4 + sll a5, a2 + + /* Shift the mantissa into position. */ + srli xh, a5, 11 + slli xl, a5, (32 - 11) + + /* Set the exponent. */ + movi a5, 0x41d /* 0x3fe + 31 */ + sub a5, a5, a4 + slli a5, a5, 20 + add xh, xh, a5 + + /* Add the sign and return. */ + slli a7, a7, 31 + or xh, xh, a7 + leaf_return + +.Lfloatsidf_return_zero: + movi a3, 0 + leaf_return + +#endif /* L_floatsidf */ + +#ifdef L_floatdidf + + .align 4 + .global __floatundidf + .type __floatundidf, @function +__floatundidf: + leaf_entry sp, 16 + + /* Check for zero. */ + or a4, xh, xl + beqz a4, 2f + + /* Set the sign to zero and jump to the floatdidf code. */ + movi a7, 0 + j .Lfloatdidf_normalize + + .align 4 + .global __floatdidf + .type __floatdidf, @function +__floatdidf: + leaf_entry sp, 16 + + /* Check for zero. */ + or a4, xh, xl + beqz a4, 2f + + /* Save the sign. */ + extui a7, xh, 31, 1 + + /* Get the absolute value. */ + bgez xh, .Lfloatdidf_normalize + neg xl, xl + neg xh, xh + beqz xl, .Lfloatdidf_normalize + addi xh, xh, -1 + +.Lfloatdidf_normalize: + /* Normalize with the first 1 bit in the msb of xh. */ + beqz xh, .Lfloatdidf_bigshift + do_nsau a4, xh, a5, a6 + ssl a4 + src xh, xh, xl + sll xl, xl + +.Lfloatdidf_shifted: + /* Shift the mantissa into position, with rounding bits in a6. */ + ssai 11 + sll a6, xl + src xl, xh, xl + srl xh, xh + + /* Set the exponent. */ + movi a5, 0x43d /* 0x3fe + 63 */ + sub a5, a5, a4 + slli a5, a5, 20 + add xh, xh, a5 + + /* Add the sign. */ + slli a7, a7, 31 + or xh, xh, a7 + + /* Round up if the leftover fraction is >= 1/2. */ + bgez a6, 2f + addi xl, xl, 1 + beqz xl, .Lfloatdidf_roundcarry + + /* Check if the leftover fraction is exactly 1/2. */ + slli a6, a6, 1 + beqz a6, .Lfloatdidf_exactlyhalf +2: leaf_return + +.Lfloatdidf_bigshift: + /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ + do_nsau a4, xl, a5, a6 + ssl a4 + sll xh, xl + movi xl, 0 + addi a4, a4, 32 + j .Lfloatdidf_shifted + +.Lfloatdidf_exactlyhalf: + /* Round down to the nearest even value. */ + srli xl, xl, 1 + slli xl, xl, 1 + leaf_return + +.Lfloatdidf_roundcarry: + /* xl is always zero when the rounding increment overflows, so + there's no need to round it to an even value. */ + addi xh, xh, 1 + /* Overflow to the exponent is OK. */ + leaf_return + +#endif /* L_floatdidf */ + +#ifdef L_truncdfsf2 + + .align 4 + .global __truncdfsf2 + .type __truncdfsf2, @function +__truncdfsf2: + leaf_entry sp, 16 + + /* Adjust the exponent bias. */ + movi a4, (0x3ff - 0x7f) << 20 + sub a5, xh, a4 + + /* Check for underflow. */ + xor a6, xh, a5 + bltz a6, .Ltrunc_underflow + extui a6, a5, 20, 11 + beqz a6, .Ltrunc_underflow + + /* Check for overflow. */ + movi a4, 255 + bge a6, a4, .Ltrunc_overflow + + /* Shift a5/xl << 3 into a5/a4. */ + ssai (32 - 3) + src a5, a5, xl + sll a4, xl + +.Ltrunc_addsign: + /* Add the sign bit. */ + extui a6, xh, 31, 1 + slli a6, a6, 31 + or a2, a6, a5 + + /* Round up if the leftover fraction is >= 1/2. */ + bgez a4, 1f + addi a2, a2, 1 + /* Overflow to the exponent is OK. The answer will be correct. */ + + /* Check if the leftover fraction is exactly 1/2. */ + slli a4, a4, 1 + beqz a4, .Ltrunc_exactlyhalf +1: leaf_return + +.Ltrunc_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + leaf_return + +.Ltrunc_overflow: + /* Check if exponent == 0x7ff. */ + movi a4, 0x7ff00000 + bnall xh, a4, 1f + + /* Check if mantissa is nonzero. */ + slli a5, xh, 12 + or a5, a5, xl + beqz a5, 1f + + /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ + srli a4, a4, 1 + +1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ + /* Add the sign bit. */ + extui a6, xh, 31, 1 + ssai 1 + src a2, a6, a4 + leaf_return + +.Ltrunc_underflow: + /* Find shift count for a subnormal. Flush to zero if >= 32. */ + extui a6, xh, 20, 11 + movi a5, 0x3ff - 0x7f + sub a6, a5, a6 + addi a6, a6, 1 + bgeui a6, 32, 1f + + /* Replace the exponent with an explicit "1.0". */ + slli a5, a5, 13 /* 0x700000 */ + or a5, a5, xh + slli a5, a5, 11 + srli a5, a5, 11 + + /* Shift the mantissa left by 3 bits (into a5/a4). */ + ssai (32 - 3) + src a5, a5, xl + sll a4, xl + + /* Shift right by a6. */ + ssr a6 + sll a7, a4 + src a4, a5, a4 + srl a5, a5 + beqz a7, .Ltrunc_addsign + or a4, a4, a6 /* any positive, nonzero value will work */ + j .Ltrunc_addsign + + /* Return +/- zero. */ +1: extui a2, xh, 31, 1 + slli a2, a2, 31 + leaf_return + +#endif /* L_truncdfsf2 */ + +#ifdef L_extendsfdf2 + + .align 4 + .global __extendsfdf2 + .type __extendsfdf2, @function +__extendsfdf2: + leaf_entry sp, 16 + + /* Save the sign bit and then shift it off. */ + extui a5, a2, 31, 1 + slli a5, a5, 31 + slli a4, a2, 1 + + /* Extract and check the exponent. */ + extui a6, a2, 23, 8 + beqz a6, .Lextend_expzero + addi a6, a6, 1 + beqi a6, 256, .Lextend_nan_or_inf + + /* Shift >> 3 into a4/xl. */ + srli a4, a4, 4 + slli xl, a2, (32 - 3) + + /* Adjust the exponent bias. */ + movi a6, (0x3ff - 0x7f) << 20 + add a4, a4, a6 + + /* Add the sign bit. */ + or xh, a4, a5 + leaf_return + +.Lextend_nan_or_inf: + movi a4, 0x7ff00000 + + /* Check for NaN. */ + slli a7, a2, 9 + beqz a7, 1f + + slli a6, a6, 11 /* 0x80000 */ + or a4, a4, a6 + + /* Add the sign and return. */ +1: or xh, a4, a5 + movi xl, 0 + leaf_return + +.Lextend_expzero: + beqz a4, 1b + + /* Normalize it to have 8 zero bits before the first 1 bit. */ + do_nsau a7, a4, a2, a3 + addi a7, a7, -8 + ssl a7 + sll a4, a4 + + /* Shift >> 3 into a4/xl. */ + slli xl, a4, (32 - 3) + srli a4, a4, 3 + + /* Set the exponent. */ + movi a6, 0x3fe - 0x7f + sub a6, a6, a7 + slli a6, a6, 20 + add a4, a4, a6 + + /* Add the sign and return. */ + or xh, a4, a5 + leaf_return + +#endif /* L_extendsfdf2 */ + + diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S new file mode 100644 index 000000000..d75be0e5a --- /dev/null +++ b/gcc/config/xtensa/ieee754-sf.S @@ -0,0 +1,1757 @@ +/* IEEE-754 single-precision functions for Xtensa + Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. + Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef __XTENSA_EB__ +#define xh a2 +#define xl a3 +#define yh a4 +#define yl a5 +#else +#define xh a3 +#define xl a2 +#define yh a5 +#define yl a4 +#endif + +/* Warning! The branch displacements for some Xtensa branch instructions + are quite small, and this code has been carefully laid out to keep + branch targets in range. If you change anything, be sure to check that + the assembler is not relaxing anything to branch over a jump. */ + +#ifdef L_negsf2 + + .align 4 + .global __negsf2 + .type __negsf2, @function +__negsf2: + leaf_entry sp, 16 + movi a4, 0x80000000 + xor a2, a2, a4 + leaf_return + +#endif /* L_negsf2 */ + +#ifdef L_addsubsf3 + + /* Addition */ +__addsf3_aux: + + /* Handle NaNs and Infinities. (This code is placed before the + start of the function just to keep it in range of the limited + branch displacements.) */ + +.Ladd_xnan_or_inf: + /* If y is neither Infinity nor NaN, return x. */ + bnall a3, a6, 1f + /* If x is a NaN, return it. Otherwise, return y. */ + slli a7, a2, 9 + beqz a7, .Ladd_ynan_or_inf +1: leaf_return + +.Ladd_ynan_or_inf: + /* Return y. */ + mov a2, a3 + leaf_return + +.Ladd_opposite_signs: + /* Operand signs differ. Do a subtraction. */ + slli a7, a6, 8 + xor a3, a3, a7 + j .Lsub_same_sign + + .align 4 + .global __addsf3 + .type __addsf3, @function +__addsf3: + leaf_entry sp, 16 + movi a6, 0x7f800000 + + /* Check if the two operands have the same sign. */ + xor a7, a2, a3 + bltz a7, .Ladd_opposite_signs + +.Ladd_same_sign: + /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ + ball a2, a6, .Ladd_xnan_or_inf + ball a3, a6, .Ladd_ynan_or_inf + + /* Compare the exponents. The smaller operand will be shifted + right by the exponent difference and added to the larger + one. */ + extui a7, a2, 23, 9 + extui a8, a3, 23, 9 + bltu a7, a8, .Ladd_shiftx + +.Ladd_shifty: + /* Check if the smaller (or equal) exponent is zero. */ + bnone a3, a6, .Ladd_yexpzero + + /* Replace y sign/exponent with 0x008. */ + or a3, a3, a6 + slli a3, a3, 8 + srli a3, a3, 8 + +.Ladd_yexpdiff: + /* Compute the exponent difference. */ + sub a10, a7, a8 + + /* Exponent difference > 32 -- just return the bigger value. */ + bgeui a10, 32, 1f + + /* Shift y right by the exponent difference. Any bits that are + shifted out of y are saved in a9 for rounding the result. */ + ssr a10 + movi a9, 0 + src a9, a3, a9 + srl a3, a3 + + /* Do the addition. */ + add a2, a2, a3 + + /* Check if the add overflowed into the exponent. */ + extui a10, a2, 23, 9 + beq a10, a7, .Ladd_round + mov a8, a7 + j .Ladd_carry + +.Ladd_yexpzero: + /* y is a subnormal value. Replace its sign/exponent with zero, + i.e., no implicit "1.0", and increment the apparent exponent + because subnormals behave as if they had the minimum (nonzero) + exponent. Test for the case when both exponents are zero. */ + slli a3, a3, 9 + srli a3, a3, 9 + bnone a2, a6, .Ladd_bothexpzero + addi a8, a8, 1 + j .Ladd_yexpdiff + +.Ladd_bothexpzero: + /* Both exponents are zero. Handle this as a special case. There + is no need to shift or round, and the normal code for handling + a carry into the exponent field will not work because it + assumes there is an implicit "1.0" that needs to be added. */ + add a2, a2, a3 +1: leaf_return + +.Ladd_xexpzero: + /* Same as "yexpzero" except skip handling the case when both + exponents are zero. */ + slli a2, a2, 9 + srli a2, a2, 9 + addi a7, a7, 1 + j .Ladd_xexpdiff + +.Ladd_shiftx: + /* Same thing as the "shifty" code, but with x and y swapped. Also, + because the exponent difference is always nonzero in this version, + the shift sequence can use SLL and skip loading a constant zero. */ + bnone a2, a6, .Ladd_xexpzero + + or a2, a2, a6 + slli a2, a2, 8 + srli a2, a2, 8 + +.Ladd_xexpdiff: + sub a10, a8, a7 + bgeui a10, 32, .Ladd_returny + + ssr a10 + sll a9, a2 + srl a2, a2 + + add a2, a2, a3 + + /* Check if the add overflowed into the exponent. */ + extui a10, a2, 23, 9 + bne a10, a8, .Ladd_carry + +.Ladd_round: + /* Round up if the leftover fraction is >= 1/2. */ + bgez a9, 1f + addi a2, a2, 1 + + /* Check if the leftover fraction is exactly 1/2. */ + slli a9, a9, 1 + beqz a9, .Ladd_exactlyhalf +1: leaf_return + +.Ladd_returny: + mov a2, a3 + leaf_return + +.Ladd_carry: + /* The addition has overflowed into the exponent field, so the + value needs to be renormalized. The mantissa of the result + can be recovered by subtracting the original exponent and + adding 0x800000 (which is the explicit "1.0" for the + mantissa of the non-shifted operand -- the "1.0" for the + shifted operand was already added). The mantissa can then + be shifted right by one bit. The explicit "1.0" of the + shifted mantissa then needs to be replaced by the exponent, + incremented by one to account for the normalizing shift. + It is faster to combine these operations: do the shift first + and combine the additions and subtractions. If x is the + original exponent, the result is: + shifted mantissa - (x << 22) + (1 << 22) + (x << 23) + or: + shifted mantissa + ((x + 1) << 22) + Note that the exponent is incremented here by leaving the + explicit "1.0" of the mantissa in the exponent field. */ + + /* Shift x right by one bit. Save the lsb. */ + mov a10, a2 + srli a2, a2, 1 + + /* See explanation above. The original exponent is in a8. */ + addi a8, a8, 1 + slli a8, a8, 22 + add a2, a2, a8 + + /* Return an Infinity if the exponent overflowed. */ + ball a2, a6, .Ladd_infinity + + /* Same thing as the "round" code except the msb of the leftover + fraction is bit 0 of a10, with the rest of the fraction in a9. */ + bbci.l a10, 0, 1f + addi a2, a2, 1 + beqz a9, .Ladd_exactlyhalf +1: leaf_return + +.Ladd_infinity: + /* Clear the mantissa. */ + srli a2, a2, 23 + slli a2, a2, 23 + + /* The sign bit may have been lost in a carry-out. Put it back. */ + slli a8, a8, 1 + or a2, a2, a8 + leaf_return + +.Ladd_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + leaf_return + + + /* Subtraction */ +__subsf3_aux: + + /* Handle NaNs and Infinities. (This code is placed before the + start of the function just to keep it in range of the limited + branch displacements.) */ + +.Lsub_xnan_or_inf: + /* If y is neither Infinity nor NaN, return x. */ + bnall a3, a6, 1f + /* Both x and y are either NaN or Inf, so the result is NaN. */ + movi a4, 0x400000 /* make it a quiet NaN */ + or a2, a2, a4 +1: leaf_return + +.Lsub_ynan_or_inf: + /* Negate y and return it. */ + slli a7, a6, 8 + xor a2, a3, a7 + leaf_return + +.Lsub_opposite_signs: + /* Operand signs differ. Do an addition. */ + slli a7, a6, 8 + xor a3, a3, a7 + j .Ladd_same_sign + + .align 4 + .global __subsf3 + .type __subsf3, @function +__subsf3: + leaf_entry sp, 16 + movi a6, 0x7f800000 + + /* Check if the two operands have the same sign. */ + xor a7, a2, a3 + bltz a7, .Lsub_opposite_signs + +.Lsub_same_sign: + /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ + ball a2, a6, .Lsub_xnan_or_inf + ball a3, a6, .Lsub_ynan_or_inf + + /* Compare the operands. In contrast to addition, the entire + value matters here. */ + extui a7, a2, 23, 8 + extui a8, a3, 23, 8 + bltu a2, a3, .Lsub_xsmaller + +.Lsub_ysmaller: + /* Check if the smaller (or equal) exponent is zero. */ + bnone a3, a6, .Lsub_yexpzero + + /* Replace y sign/exponent with 0x008. */ + or a3, a3, a6 + slli a3, a3, 8 + srli a3, a3, 8 + +.Lsub_yexpdiff: + /* Compute the exponent difference. */ + sub a10, a7, a8 + + /* Exponent difference > 32 -- just return the bigger value. */ + bgeui a10, 32, 1f + + /* Shift y right by the exponent difference. Any bits that are + shifted out of y are saved in a9 for rounding the result. */ + ssr a10 + movi a9, 0 + src a9, a3, a9 + srl a3, a3 + + sub a2, a2, a3 + + /* Subtract the leftover bits in a9 from zero and propagate any + borrow from a2. */ + neg a9, a9 + addi a10, a2, -1 + movnez a2, a10, a9 + + /* Check if the subtract underflowed into the exponent. */ + extui a10, a2, 23, 8 + beq a10, a7, .Lsub_round + j .Lsub_borrow + +.Lsub_yexpzero: + /* Return zero if the inputs are equal. (For the non-subnormal + case, subtracting the "1.0" will cause a borrow from the exponent + and this case can be detected when handling the borrow.) */ + beq a2, a3, .Lsub_return_zero + + /* y is a subnormal value. Replace its sign/exponent with zero, + i.e., no implicit "1.0". Unless x is also a subnormal, increment + y's apparent exponent because subnormals behave as if they had + the minimum (nonzero) exponent. */ + slli a3, a3, 9 + srli a3, a3, 9 + bnone a2, a6, .Lsub_yexpdiff + addi a8, a8, 1 + j .Lsub_yexpdiff + +.Lsub_returny: + /* Negate and return y. */ + slli a7, a6, 8 + xor a2, a3, a7 +1: leaf_return + +.Lsub_xsmaller: + /* Same thing as the "ysmaller" code, but with x and y swapped and + with y negated. */ + bnone a2, a6, .Lsub_xexpzero + + or a2, a2, a6 + slli a2, a2, 8 + srli a2, a2, 8 + +.Lsub_xexpdiff: + sub a10, a8, a7 + bgeui a10, 32, .Lsub_returny + + ssr a10 + movi a9, 0 + src a9, a2, a9 + srl a2, a2 + + /* Negate y. */ + slli a11, a6, 8 + xor a3, a3, a11 + + sub a2, a3, a2 + + neg a9, a9 + addi a10, a2, -1 + movnez a2, a10, a9 + + /* Check if the subtract underflowed into the exponent. */ + extui a10, a2, 23, 8 + bne a10, a8, .Lsub_borrow + +.Lsub_round: + /* Round up if the leftover fraction is >= 1/2. */ + bgez a9, 1f + addi a2, a2, 1 + + /* Check if the leftover fraction is exactly 1/2. */ + slli a9, a9, 1 + beqz a9, .Lsub_exactlyhalf +1: leaf_return + +.Lsub_xexpzero: + /* Same as "yexpzero". */ + beq a2, a3, .Lsub_return_zero + slli a2, a2, 9 + srli a2, a2, 9 + bnone a3, a6, .Lsub_xexpdiff + addi a7, a7, 1 + j .Lsub_xexpdiff + +.Lsub_return_zero: + movi a2, 0 + leaf_return + +.Lsub_borrow: + /* The subtraction has underflowed into the exponent field, so the + value needs to be renormalized. Shift the mantissa left as + needed to remove any leading zeros and adjust the exponent + accordingly. If the exponent is not large enough to remove + all the leading zeros, the result will be a subnormal value. */ + + slli a8, a2, 9 + beqz a8, .Lsub_xzero + do_nsau a6, a8, a7, a11 + srli a8, a8, 9 + bge a6, a10, .Lsub_subnormal + addi a6, a6, 1 + +.Lsub_normalize_shift: + /* Shift the mantissa (a8/a9) left by a6. */ + ssl a6 + src a8, a8, a9 + sll a9, a9 + + /* Combine the shifted mantissa with the sign and exponent, + decrementing the exponent by a6. (The exponent has already + been decremented by one due to the borrow from the subtraction, + but adding the mantissa will increment the exponent by one.) */ + srli a2, a2, 23 + sub a2, a2, a6 + slli a2, a2, 23 + add a2, a2, a8 + j .Lsub_round + +.Lsub_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + leaf_return + +.Lsub_xzero: + /* If there was a borrow from the exponent, and the mantissa and + guard digits are all zero, then the inputs were equal and the + result should be zero. */ + beqz a9, .Lsub_return_zero + + /* Only the guard digit is nonzero. Shift by min(24, a10). */ + addi a11, a10, -24 + movi a6, 24 + movltz a6, a10, a11 + j .Lsub_normalize_shift + +.Lsub_subnormal: + /* The exponent is too small to shift away all the leading zeros. + Set a6 to the current exponent (which has already been + decremented by the borrow) so that the exponent of the result + will be zero. Do not add 1 to a6 in this case, because: (1) + adding the mantissa will not increment the exponent, so there is + no need to subtract anything extra from the exponent to + compensate, and (2) the effective exponent of a subnormal is 1 + not 0 so the shift amount must be 1 smaller than normal. */ + mov a6, a10 + j .Lsub_normalize_shift + +#endif /* L_addsubsf3 */ + +#ifdef L_mulsf3 + + /* Multiplication */ +#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 1 +#endif + +__mulsf3_aux: + + /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). + (This code is placed before the start of the function just to + keep it in range of the limited branch displacements.) */ + +.Lmul_xexpzero: + /* Clear the sign bit of x. */ + slli a2, a2, 1 + srli a2, a2, 1 + + /* If x is zero, return zero. */ + beqz a2, .Lmul_return_zero + + /* Normalize x. Adjust the exponent in a8. */ + do_nsau a10, a2, a11, a12 + addi a10, a10, -8 + ssl a10 + sll a2, a2 + movi a8, 1 + sub a8, a8, a10 + j .Lmul_xnormalized + +.Lmul_yexpzero: + /* Clear the sign bit of y. */ + slli a3, a3, 1 + srli a3, a3, 1 + + /* If y is zero, return zero. */ + beqz a3, .Lmul_return_zero + + /* Normalize y. Adjust the exponent in a9. */ + do_nsau a10, a3, a11, a12 + addi a10, a10, -8 + ssl a10 + sll a3, a3 + movi a9, 1 + sub a9, a9, a10 + j .Lmul_ynormalized + +.Lmul_return_zero: + /* Return zero with the appropriate sign bit. */ + srli a2, a7, 31 + slli a2, a2, 31 + j .Lmul_done + +.Lmul_xnan_or_inf: + /* If y is zero, return NaN. */ + slli a8, a3, 1 + bnez a8, 1f + movi a4, 0x400000 /* make it a quiet NaN */ + or a2, a2, a4 + j .Lmul_done +1: + /* If y is NaN, return y. */ + bnall a3, a6, .Lmul_returnx + slli a8, a3, 9 + beqz a8, .Lmul_returnx + +.Lmul_returny: + mov a2, a3 + +.Lmul_returnx: + /* Set the sign bit and return. */ + extui a7, a7, 31, 1 + slli a2, a2, 1 + ssai 1 + src a2, a7, a2 + j .Lmul_done + +.Lmul_ynan_or_inf: + /* If x is zero, return NaN. */ + slli a8, a2, 1 + bnez a8, .Lmul_returny + movi a7, 0x400000 /* make it a quiet NaN */ + or a2, a3, a7 + j .Lmul_done + + .align 4 + .global __mulsf3 + .type __mulsf3, @function +__mulsf3: +#if __XTENSA_CALL0_ABI__ + leaf_entry sp, 32 + addi sp, sp, -32 + s32i a12, sp, 16 + s32i a13, sp, 20 + s32i a14, sp, 24 + s32i a15, sp, 28 +#elif XCHAL_NO_MUL + /* This is not really a leaf function; allocate enough stack space + to allow CALL12s to a helper function. */ + leaf_entry sp, 64 +#else + leaf_entry sp, 32 +#endif + movi a6, 0x7f800000 + + /* Get the sign of the result. */ + xor a7, a2, a3 + + /* Check for NaN and infinity. */ + ball a2, a6, .Lmul_xnan_or_inf + ball a3, a6, .Lmul_ynan_or_inf + + /* Extract the exponents. */ + extui a8, a2, 23, 8 + extui a9, a3, 23, 8 + + beqz a8, .Lmul_xexpzero +.Lmul_xnormalized: + beqz a9, .Lmul_yexpzero +.Lmul_ynormalized: + + /* Add the exponents. */ + add a8, a8, a9 + + /* Replace sign/exponent fields with explicit "1.0". */ + movi a10, 0xffffff + or a2, a2, a6 + and a2, a2, a10 + or a3, a3, a6 + and a3, a3, a10 + + /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */ + +#if XCHAL_HAVE_MUL32_HIGH + + mull a6, a2, a3 + muluh a2, a2, a3 + +#else + + /* Break the inputs into 16-bit chunks and compute 4 32-bit partial + products. These partial products are: + + 0 xl * yl + + 1 xl * yh + 2 xh * yl + + 3 xh * yh + + If using the Mul16 or Mul32 multiplier options, these input + chunks must be stored in separate registers. For Mac16, the + UMUL.AA.* opcodes can specify that the inputs come from either + half of the registers, so there is no need to shift them out + ahead of time. If there is no multiply hardware, the 16-bit + chunks can be extracted when setting up the arguments to the + separate multiply function. */ + +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + /* Calling a separate multiply function will clobber a0 and requires + use of a8 as a temporary, so save those values now. (The function + uses a custom ABI so nothing else needs to be saved.) */ + s32i a0, sp, 0 + s32i a8, sp, 4 +#endif + +#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 + +#define a2h a4 +#define a3h a5 + + /* Get the high halves of the inputs into registers. */ + srli a2h, a2, 16 + srli a3h, a3, 16 + +#define a2l a2 +#define a3l a3 + +#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 + /* Clear the high halves of the inputs. This does not matter + for MUL16 because the high bits are ignored. */ + extui a2, a2, 0, 16 + extui a3, a3, 0, 16 +#endif +#endif /* MUL16 || MUL32 */ + + +#if XCHAL_HAVE_MUL16 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mul16u dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MUL32 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mull dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MAC16 + +/* The preprocessor insists on inserting a space when concatenating after + a period in the definition of do_mul below. These macros are a workaround + using underscores instead of periods when doing the concatenation. */ +#define umul_aa_ll umul.aa.ll +#define umul_aa_lh umul.aa.lh +#define umul_aa_hl umul.aa.hl +#define umul_aa_hh umul.aa.hh + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + umul_aa_ ## xhalf ## yhalf xreg, yreg; \ + rsr dst, ACCLO + +#else /* no multiply hardware */ + +#define set_arg_l(dst, src) \ + extui dst, src, 0, 16 +#define set_arg_h(dst, src) \ + srli dst, src, 16 + +#if __XTENSA_CALL0_ABI__ +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a13, xreg); \ + set_arg_ ## yhalf (a14, yreg); \ + call0 .Lmul_mulsi3; \ + mov dst, a12 +#else +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a14, xreg); \ + set_arg_ ## yhalf (a15, yreg); \ + call12 .Lmul_mulsi3; \ + mov dst, a14 +#endif /* __XTENSA_CALL0_ABI__ */ + +#endif /* no multiply hardware */ + + /* Add pp1 and pp2 into a6 with carry-out in a9. */ + do_mul(a6, a2, l, a3, h) /* pp 1 */ + do_mul(a11, a2, h, a3, l) /* pp 2 */ + movi a9, 0 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Shift the high half of a9/a6 into position in a9. Note that + this value can be safely incremented without any carry-outs. */ + ssai 16 + src a9, a9, a6 + + /* Compute the low word into a6. */ + do_mul(a11, a2, l, a3, l) /* pp 0 */ + sll a6, a6 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Compute the high word into a2. */ + do_mul(a2, a2, h, a3, h) /* pp 3 */ + add a2, a2, a9 + +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + /* Restore values saved on the stack during the multiplication. */ + l32i a0, sp, 0 + l32i a8, sp, 4 +#endif +#endif /* ! XCHAL_HAVE_MUL32_HIGH */ + + /* Shift left by 9 bits, unless there was a carry-out from the + multiply, in which case, shift by 8 bits and increment the + exponent. */ + movi a4, 9 + srli a5, a2, 24 - 9 + beqz a5, 1f + addi a4, a4, -1 + addi a8, a8, 1 +1: ssl a4 + src a2, a2, a6 + sll a6, a6 + + /* Subtract the extra bias from the exponent sum (plus one to account + for the explicit "1.0" of the mantissa that will be added to the + exponent in the final result). */ + movi a4, 0x80 + sub a8, a8, a4 + + /* Check for over/underflow. The value in a8 is one less than the + final exponent, so values in the range 0..fd are OK here. */ + movi a4, 0xfe + bgeu a8, a4, .Lmul_overflow + +.Lmul_round: + /* Round. */ + bgez a6, .Lmul_rounded + addi a2, a2, 1 + slli a6, a6, 1 + beqz a6, .Lmul_exactlyhalf + +.Lmul_rounded: + /* Add the exponent to the mantissa. */ + slli a8, a8, 23 + add a2, a2, a8 + +.Lmul_addsign: + /* Add the sign bit. */ + srli a7, a7, 31 + slli a7, a7, 31 + or a2, a2, a7 + +.Lmul_done: +#if __XTENSA_CALL0_ABI__ + l32i a12, sp, 16 + l32i a13, sp, 20 + l32i a14, sp, 24 + l32i a15, sp, 28 + addi sp, sp, 32 +#endif + leaf_return + +.Lmul_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + j .Lmul_rounded + +.Lmul_overflow: + bltz a8, .Lmul_underflow + /* Return +/- Infinity. */ + movi a8, 0xff + slli a2, a8, 23 + j .Lmul_addsign + +.Lmul_underflow: + /* Create a subnormal value, where the exponent field contains zero, + but the effective exponent is 1. The value of a8 is one less than + the actual exponent, so just negate it to get the shift amount. */ + neg a8, a8 + mov a9, a6 + ssr a8 + bgeui a8, 32, .Lmul_flush_to_zero + + /* Shift a2 right. Any bits that are shifted out of a2 are saved + in a6 (combined with the shifted-out bits currently in a6) for + rounding the result. */ + sll a6, a2 + srl a2, a2 + + /* Set the exponent to zero. */ + movi a8, 0 + + /* Pack any nonzero bits shifted out into a6. */ + beqz a9, .Lmul_round + movi a9, 1 + or a6, a6, a9 + j .Lmul_round + +.Lmul_flush_to_zero: + /* Return zero with the appropriate sign bit. */ + srli a2, a7, 31 + slli a2, a2, 31 + j .Lmul_done + +#if XCHAL_NO_MUL + + /* For Xtensa processors with no multiply hardware, this simplified + version of _mulsi3 is used for multiplying 16-bit chunks of + the floating-point mantissas. When using CALL0, this function + uses a custom ABI: the inputs are passed in a13 and a14, the + result is returned in a12, and a8 and a15 are clobbered. */ + .align 4 +.Lmul_mulsi3: + leaf_entry sp, 16 + .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 + movi \dst, 0 +1: add \tmp1, \src2, \dst + extui \tmp2, \src1, 0, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx2 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 1, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx4 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 2, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx8 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 3, 1 + movnez \dst, \tmp1, \tmp2 + + srli \src1, \src1, 4 + slli \src2, \src2, 4 + bnez \src1, 1b + .endm +#if __XTENSA_CALL0_ABI__ + mul_mulsi3_body a12, a13, a14, a15, a8 +#else + /* The result will be written into a2, so save that argument in a4. */ + mov a4, a2 + mul_mulsi3_body a2, a4, a3, a5, a6 +#endif + leaf_return +#endif /* XCHAL_NO_MUL */ +#endif /* L_mulsf3 */ + +#ifdef L_divsf3 + + /* Division */ +__divsf3_aux: + + /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). + (This code is placed before the start of the function just to + keep it in range of the limited branch displacements.) */ + +.Ldiv_yexpzero: + /* Clear the sign bit of y. */ + slli a3, a3, 1 + srli a3, a3, 1 + + /* Check for division by zero. */ + beqz a3, .Ldiv_yzero + + /* Normalize y. Adjust the exponent in a9. */ + do_nsau a10, a3, a4, a5 + addi a10, a10, -8 + ssl a10 + sll a3, a3 + movi a9, 1 + sub a9, a9, a10 + j .Ldiv_ynormalized + +.Ldiv_yzero: + /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ + slli a4, a2, 1 + srli a4, a4, 1 + srli a2, a7, 31 + slli a2, a2, 31 + or a2, a2, a6 + bnez a4, 1f + movi a4, 0x400000 /* make it a quiet NaN */ + or a2, a2, a4 +1: leaf_return + +.Ldiv_xexpzero: + /* Clear the sign bit of x. */ + slli a2, a2, 1 + srli a2, a2, 1 + + /* If x is zero, return zero. */ + beqz a2, .Ldiv_return_zero + + /* Normalize x. Adjust the exponent in a8. */ + do_nsau a10, a2, a4, a5 + addi a10, a10, -8 + ssl a10 + sll a2, a2 + movi a8, 1 + sub a8, a8, a10 + j .Ldiv_xnormalized + +.Ldiv_return_zero: + /* Return zero with the appropriate sign bit. */ + srli a2, a7, 31 + slli a2, a2, 31 + leaf_return + +.Ldiv_xnan_or_inf: + /* Set the sign bit of the result. */ + srli a7, a3, 31 + slli a7, a7, 31 + xor a2, a2, a7 + /* If y is NaN or Inf, return NaN. */ + bnall a3, a6, 1f + movi a4, 0x400000 /* make it a quiet NaN */ + or a2, a2, a4 +1: leaf_return + +.Ldiv_ynan_or_inf: + /* If y is Infinity, return zero. */ + slli a8, a3, 9 + beqz a8, .Ldiv_return_zero + /* y is NaN; return it. */ + mov a2, a3 + leaf_return + + .align 4 + .global __divsf3 + .type __divsf3, @function +__divsf3: + leaf_entry sp, 16 + movi a6, 0x7f800000 + + /* Get the sign of the result. */ + xor a7, a2, a3 + + /* Check for NaN and infinity. */ + ball a2, a6, .Ldiv_xnan_or_inf + ball a3, a6, .Ldiv_ynan_or_inf + + /* Extract the exponents. */ + extui a8, a2, 23, 8 + extui a9, a3, 23, 8 + + beqz a9, .Ldiv_yexpzero +.Ldiv_ynormalized: + beqz a8, .Ldiv_xexpzero +.Ldiv_xnormalized: + + /* Subtract the exponents. */ + sub a8, a8, a9 + + /* Replace sign/exponent fields with explicit "1.0". */ + movi a10, 0xffffff + or a2, a2, a6 + and a2, a2, a10 + or a3, a3, a6 + and a3, a3, a10 + + /* The first digit of the mantissa division must be a one. + Shift x (and adjust the exponent) as needed to make this true. */ + bltu a3, a2, 1f + slli a2, a2, 1 + addi a8, a8, -1 +1: + /* Do the first subtraction and shift. */ + sub a2, a2, a3 + slli a2, a2, 1 + + /* Put the quotient into a10. */ + movi a10, 1 + + /* Divide one bit at a time for 23 bits. */ + movi a9, 23 +#if XCHAL_HAVE_LOOPS + loop a9, .Ldiv_loopend +#endif +.Ldiv_loop: + /* Shift the quotient << 1. */ + slli a10, a10, 1 + + /* Is this digit a 0 or 1? */ + bltu a2, a3, 1f + + /* Output a 1 and subtract. */ + addi a10, a10, 1 + sub a2, a2, a3 + + /* Shift the dividend << 1. */ +1: slli a2, a2, 1 + +#if !XCHAL_HAVE_LOOPS + addi a9, a9, -1 + bnez a9, .Ldiv_loop +#endif +.Ldiv_loopend: + + /* Add the exponent bias (less one to account for the explicit "1.0" + of the mantissa that will be added to the exponent in the final + result). */ + addi a8, a8, 0x7e + + /* Check for over/underflow. The value in a8 is one less than the + final exponent, so values in the range 0..fd are OK here. */ + movi a4, 0xfe + bgeu a8, a4, .Ldiv_overflow + +.Ldiv_round: + /* Round. The remainder (<< 1) is in a2. */ + bltu a2, a3, .Ldiv_rounded + addi a10, a10, 1 + beq a2, a3, .Ldiv_exactlyhalf + +.Ldiv_rounded: + /* Add the exponent to the mantissa. */ + slli a8, a8, 23 + add a2, a10, a8 + +.Ldiv_addsign: + /* Add the sign bit. */ + srli a7, a7, 31 + slli a7, a7, 31 + or a2, a2, a7 + leaf_return + +.Ldiv_overflow: + bltz a8, .Ldiv_underflow + /* Return +/- Infinity. */ + addi a8, a4, 1 /* 0xff */ + slli a2, a8, 23 + j .Ldiv_addsign + +.Ldiv_exactlyhalf: + /* Remainder is exactly half the divisor. Round even. */ + srli a10, a10, 1 + slli a10, a10, 1 + j .Ldiv_rounded + +.Ldiv_underflow: + /* Create a subnormal value, where the exponent field contains zero, + but the effective exponent is 1. The value of a8 is one less than + the actual exponent, so just negate it to get the shift amount. */ + neg a8, a8 + ssr a8 + bgeui a8, 32, .Ldiv_flush_to_zero + + /* Shift a10 right. Any bits that are shifted out of a10 are + saved in a6 for rounding the result. */ + sll a6, a10 + srl a10, a10 + + /* Set the exponent to zero. */ + movi a8, 0 + + /* Pack any nonzero remainder (in a2) into a6. */ + beqz a2, 1f + movi a9, 1 + or a6, a6, a9 + + /* Round a10 based on the bits shifted out into a6. */ +1: bgez a6, .Ldiv_rounded + addi a10, a10, 1 + slli a6, a6, 1 + bnez a6, .Ldiv_rounded + srli a10, a10, 1 + slli a10, a10, 1 + j .Ldiv_rounded + +.Ldiv_flush_to_zero: + /* Return zero with the appropriate sign bit. */ + srli a2, a7, 31 + slli a2, a2, 31 + leaf_return + +#endif /* L_divsf3 */ + +#ifdef L_cmpsf2 + + /* Equal and Not Equal */ + + .align 4 + .global __eqsf2 + .global __nesf2 + .set __nesf2, __eqsf2 + .type __eqsf2, @function +__eqsf2: + leaf_entry sp, 16 + bne a2, a3, 4f + + /* The values are equal but NaN != NaN. Check the exponent. */ + movi a6, 0x7f800000 + ball a2, a6, 3f + + /* Equal. */ + movi a2, 0 + leaf_return + + /* Not equal. */ +2: movi a2, 1 + leaf_return + + /* Check if the mantissas are nonzero. */ +3: slli a7, a2, 9 + j 5f + + /* Check if x and y are zero with different signs. */ +4: or a7, a2, a3 + slli a7, a7, 1 + + /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa + or x when exponent(x) = 0x7f8 and x == y. */ +5: movi a2, 0 + movi a3, 1 + movnez a2, a3, a7 + leaf_return + + + /* Greater Than */ + + .align 4 + .global __gtsf2 + .type __gtsf2, @function +__gtsf2: + leaf_entry sp, 16 + movi a6, 0x7f800000 + ball a2, a6, 2f +1: bnall a3, a6, .Lle_cmp + + /* Check if y is a NaN. */ + slli a7, a3, 9 + beqz a7, .Lle_cmp + movi a2, 0 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, a2, 9 + beqz a7, 1b + movi a2, 0 + leaf_return + + + /* Less Than or Equal */ + + .align 4 + .global __lesf2 + .type __lesf2, @function +__lesf2: + leaf_entry sp, 16 + movi a6, 0x7f800000 + ball a2, a6, 2f +1: bnall a3, a6, .Lle_cmp + + /* Check if y is a NaN. */ + slli a7, a3, 9 + beqz a7, .Lle_cmp + movi a2, 1 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, a2, 9 + beqz a7, 1b + movi a2, 1 + leaf_return + +.Lle_cmp: + /* Check if x and y have different signs. */ + xor a7, a2, a3 + bltz a7, .Lle_diff_signs + + /* Check if x is negative. */ + bltz a2, .Lle_xneg + + /* Check if x <= y. */ + bltu a3, a2, 5f +4: movi a2, 0 + leaf_return + +.Lle_xneg: + /* Check if y <= x. */ + bgeu a2, a3, 4b +5: movi a2, 1 + leaf_return + +.Lle_diff_signs: + bltz a2, 4b + + /* Check if both x and y are zero. */ + or a7, a2, a3 + slli a7, a7, 1 + movi a2, 1 + movi a3, 0 + moveqz a2, a3, a7 + leaf_return + + + /* Greater Than or Equal */ + + .align 4 + .global __gesf2 + .type __gesf2, @function +__gesf2: + leaf_entry sp, 16 + movi a6, 0x7f800000 + ball a2, a6, 2f +1: bnall a3, a6, .Llt_cmp + + /* Check if y is a NaN. */ + slli a7, a3, 9 + beqz a7, .Llt_cmp + movi a2, -1 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, a2, 9 + beqz a7, 1b + movi a2, -1 + leaf_return + + + /* Less Than */ + + .align 4 + .global __ltsf2 + .type __ltsf2, @function +__ltsf2: + leaf_entry sp, 16 + movi a6, 0x7f800000 + ball a2, a6, 2f +1: bnall a3, a6, .Llt_cmp + + /* Check if y is a NaN. */ + slli a7, a3, 9 + beqz a7, .Llt_cmp + movi a2, 0 + leaf_return + + /* Check if x is a NaN. */ +2: slli a7, a2, 9 + beqz a7, 1b + movi a2, 0 + leaf_return + +.Llt_cmp: + /* Check if x and y have different signs. */ + xor a7, a2, a3 + bltz a7, .Llt_diff_signs + + /* Check if x is negative. */ + bltz a2, .Llt_xneg + + /* Check if x < y. */ + bgeu a2, a3, 5f +4: movi a2, -1 + leaf_return + +.Llt_xneg: + /* Check if y < x. */ + bltu a3, a2, 4b +5: movi a2, 0 + leaf_return + +.Llt_diff_signs: + bgez a2, 5b + + /* Check if both x and y are nonzero. */ + or a7, a2, a3 + slli a7, a7, 1 + movi a2, 0 + movi a3, -1 + movnez a2, a3, a7 + leaf_return + + + /* Unordered */ + + .align 4 + .global __unordsf2 + .type __unordsf2, @function +__unordsf2: + leaf_entry sp, 16 + movi a6, 0x7f800000 + ball a2, a6, 3f +1: ball a3, a6, 4f +2: movi a2, 0 + leaf_return + +3: slli a7, a2, 9 + beqz a7, 1b + movi a2, 1 + leaf_return + +4: slli a7, a3, 9 + beqz a7, 2b + movi a2, 1 + leaf_return + +#endif /* L_cmpsf2 */ + +#ifdef L_fixsfsi + + .align 4 + .global __fixsfsi + .type __fixsfsi, @function +__fixsfsi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7f800000 + ball a2, a6, .Lfixsfsi_nan_or_inf + + /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */ + extui a4, a2, 23, 8 + addi a4, a4, -0x7e + bgei a4, 32, .Lfixsfsi_maxint + blti a4, 1, .Lfixsfsi_zero + + /* Add explicit "1.0" and shift << 8. */ + or a7, a2, a6 + slli a5, a7, 8 + + /* Shift back to the right, based on the exponent. */ + ssl a4 /* shift by 32 - a4 */ + srl a5, a5 + + /* Negate the result if sign != 0. */ + neg a2, a5 + movgez a2, a5, a7 + leaf_return + +.Lfixsfsi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, a2, 9 + beqz a4, .Lfixsfsi_maxint + + /* Translate NaN to +maxint. */ + movi a2, 0 + +.Lfixsfsi_maxint: + slli a4, a6, 8 /* 0x80000000 */ + addi a5, a4, -1 /* 0x7fffffff */ + movgez a4, a5, a2 + mov a2, a4 + leaf_return + +.Lfixsfsi_zero: + movi a2, 0 + leaf_return + +#endif /* L_fixsfsi */ + +#ifdef L_fixsfdi + + .align 4 + .global __fixsfdi + .type __fixsfdi, @function +__fixsfdi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7f800000 + ball a2, a6, .Lfixsfdi_nan_or_inf + + /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */ + extui a4, a2, 23, 8 + addi a4, a4, -0x7e + bgei a4, 64, .Lfixsfdi_maxint + blti a4, 1, .Lfixsfdi_zero + + /* Add explicit "1.0" and shift << 8. */ + or a7, a2, a6 + slli xh, a7, 8 + + /* Shift back to the right, based on the exponent. */ + ssl a4 /* shift by 64 - a4 */ + bgei a4, 32, .Lfixsfdi_smallshift + srl xl, xh + movi xh, 0 + +.Lfixsfdi_shifted: + /* Negate the result if sign != 0. */ + bgez a7, 1f + neg xl, xl + neg xh, xh + beqz xl, 1f + addi xh, xh, -1 +1: leaf_return + +.Lfixsfdi_smallshift: + movi xl, 0 + sll xl, xh + srl xh, xh + j .Lfixsfdi_shifted + +.Lfixsfdi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, a2, 9 + beqz a4, .Lfixsfdi_maxint + + /* Translate NaN to +maxint. */ + movi a2, 0 + +.Lfixsfdi_maxint: + slli a7, a6, 8 /* 0x80000000 */ + bgez a2, 1f + mov xh, a7 + movi xl, 0 + leaf_return + +1: addi xh, a7, -1 /* 0x7fffffff */ + movi xl, -1 + leaf_return + +.Lfixsfdi_zero: + movi xh, 0 + movi xl, 0 + leaf_return + +#endif /* L_fixsfdi */ + +#ifdef L_fixunssfsi + + .align 4 + .global __fixunssfsi + .type __fixunssfsi, @function +__fixunssfsi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7f800000 + ball a2, a6, .Lfixunssfsi_nan_or_inf + + /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */ + extui a4, a2, 23, 8 + addi a4, a4, -0x7f + bgei a4, 32, .Lfixunssfsi_maxint + bltz a4, .Lfixunssfsi_zero + + /* Add explicit "1.0" and shift << 8. */ + or a7, a2, a6 + slli a5, a7, 8 + + /* Shift back to the right, based on the exponent. */ + addi a4, a4, 1 + beqi a4, 32, .Lfixunssfsi_bigexp + ssl a4 /* shift by 32 - a4 */ + srl a5, a5 + + /* Negate the result if sign != 0. */ + neg a2, a5 + movgez a2, a5, a7 + leaf_return + +.Lfixunssfsi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, a2, 9 + beqz a4, .Lfixunssfsi_maxint + + /* Translate NaN to 0xffffffff. */ + movi a2, -1 + leaf_return + +.Lfixunssfsi_maxint: + slli a4, a6, 8 /* 0x80000000 */ + movi a5, -1 /* 0xffffffff */ + movgez a4, a5, a2 + mov a2, a4 + leaf_return + +.Lfixunssfsi_zero: + movi a2, 0 + leaf_return + +.Lfixunssfsi_bigexp: + /* Handle unsigned maximum exponent case. */ + bltz a2, 1f + mov a2, a5 /* no shift needed */ + leaf_return + + /* Return 0x80000000 if negative. */ +1: slli a2, a6, 8 + leaf_return + +#endif /* L_fixunssfsi */ + +#ifdef L_fixunssfdi + + .align 4 + .global __fixunssfdi + .type __fixunssfdi, @function +__fixunssfdi: + leaf_entry sp, 16 + + /* Check for NaN and Infinity. */ + movi a6, 0x7f800000 + ball a2, a6, .Lfixunssfdi_nan_or_inf + + /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */ + extui a4, a2, 23, 8 + addi a4, a4, -0x7f + bgei a4, 64, .Lfixunssfdi_maxint + bltz a4, .Lfixunssfdi_zero + + /* Add explicit "1.0" and shift << 8. */ + or a7, a2, a6 + slli xh, a7, 8 + + /* Shift back to the right, based on the exponent. */ + addi a4, a4, 1 + beqi a4, 64, .Lfixunssfdi_bigexp + ssl a4 /* shift by 64 - a4 */ + bgei a4, 32, .Lfixunssfdi_smallshift + srl xl, xh + movi xh, 0 + +.Lfixunssfdi_shifted: + /* Negate the result if sign != 0. */ + bgez a7, 1f + neg xl, xl + neg xh, xh + beqz xl, 1f + addi xh, xh, -1 +1: leaf_return + +.Lfixunssfdi_smallshift: + movi xl, 0 + src xl, xh, xl + srl xh, xh + j .Lfixunssfdi_shifted + +.Lfixunssfdi_nan_or_inf: + /* Handle Infinity and NaN. */ + slli a4, a2, 9 + beqz a4, .Lfixunssfdi_maxint + + /* Translate NaN to 0xffffffff.... */ +1: movi xh, -1 + movi xl, -1 + leaf_return + +.Lfixunssfdi_maxint: + bgez a2, 1b +2: slli xh, a6, 8 /* 0x80000000 */ + movi xl, 0 + leaf_return + +.Lfixunssfdi_zero: + movi xh, 0 + movi xl, 0 + leaf_return + +.Lfixunssfdi_bigexp: + /* Handle unsigned maximum exponent case. */ + bltz a7, 2b + movi xl, 0 + leaf_return /* no shift needed */ + +#endif /* L_fixunssfdi */ + +#ifdef L_floatsisf + + .align 4 + .global __floatunsisf + .type __floatunsisf, @function +__floatunsisf: + leaf_entry sp, 16 + beqz a2, .Lfloatsisf_return + + /* Set the sign to zero and jump to the floatsisf code. */ + movi a7, 0 + j .Lfloatsisf_normalize + + .align 4 + .global __floatsisf + .type __floatsisf, @function +__floatsisf: + leaf_entry sp, 16 + + /* Check for zero. */ + beqz a2, .Lfloatsisf_return + + /* Save the sign. */ + extui a7, a2, 31, 1 + + /* Get the absolute value. */ +#if XCHAL_HAVE_ABS + abs a2, a2 +#else + neg a4, a2 + movltz a2, a4, a2 +#endif + +.Lfloatsisf_normalize: + /* Normalize with the first 1 bit in the msb. */ + do_nsau a4, a2, a5, a6 + ssl a4 + sll a5, a2 + + /* Shift the mantissa into position, with rounding bits in a6. */ + srli a2, a5, 8 + slli a6, a5, (32 - 8) + + /* Set the exponent. */ + movi a5, 0x9d /* 0x7e + 31 */ + sub a5, a5, a4 + slli a5, a5, 23 + add a2, a2, a5 + + /* Add the sign. */ + slli a7, a7, 31 + or a2, a2, a7 + + /* Round up if the leftover fraction is >= 1/2. */ + bgez a6, .Lfloatsisf_return + addi a2, a2, 1 /* Overflow to the exponent is OK. */ + + /* Check if the leftover fraction is exactly 1/2. */ + slli a6, a6, 1 + beqz a6, .Lfloatsisf_exactlyhalf + +.Lfloatsisf_return: + leaf_return + +.Lfloatsisf_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + leaf_return + +#endif /* L_floatsisf */ + +#ifdef L_floatdisf + + .align 4 + .global __floatundisf + .type __floatundisf, @function +__floatundisf: + leaf_entry sp, 16 + + /* Check for zero. */ + or a4, xh, xl + beqz a4, 2f + + /* Set the sign to zero and jump to the floatdisf code. */ + movi a7, 0 + j .Lfloatdisf_normalize + + .align 4 + .global __floatdisf + .type __floatdisf, @function +__floatdisf: + leaf_entry sp, 16 + + /* Check for zero. */ + or a4, xh, xl + beqz a4, 2f + + /* Save the sign. */ + extui a7, xh, 31, 1 + + /* Get the absolute value. */ + bgez xh, .Lfloatdisf_normalize + neg xl, xl + neg xh, xh + beqz xl, .Lfloatdisf_normalize + addi xh, xh, -1 + +.Lfloatdisf_normalize: + /* Normalize with the first 1 bit in the msb of xh. */ + beqz xh, .Lfloatdisf_bigshift + do_nsau a4, xh, a5, a6 + ssl a4 + src xh, xh, xl + sll xl, xl + +.Lfloatdisf_shifted: + /* Shift the mantissa into position, with rounding bits in a6. */ + ssai 8 + sll a5, xl + src a6, xh, xl + srl xh, xh + beqz a5, 1f + movi a5, 1 + or a6, a6, a5 +1: + /* Set the exponent. */ + movi a5, 0xbd /* 0x7e + 63 */ + sub a5, a5, a4 + slli a5, a5, 23 + add a2, xh, a5 + + /* Add the sign. */ + slli a7, a7, 31 + or a2, a2, a7 + + /* Round up if the leftover fraction is >= 1/2. */ + bgez a6, 2f + addi a2, a2, 1 /* Overflow to the exponent is OK. */ + + /* Check if the leftover fraction is exactly 1/2. */ + slli a6, a6, 1 + beqz a6, .Lfloatdisf_exactlyhalf +2: leaf_return + +.Lfloatdisf_bigshift: + /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ + do_nsau a4, xl, a5, a6 + ssl a4 + sll xh, xl + movi xl, 0 + addi a4, a4, 32 + j .Lfloatdisf_shifted + +.Lfloatdisf_exactlyhalf: + /* Round down to the nearest even value. */ + srli a2, a2, 1 + slli a2, a2, 1 + leaf_return + +#endif /* L_floatdisf */ diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm new file mode 100644 index 000000000..071b91711 --- /dev/null +++ b/gcc/config/xtensa/lib1funcs.asm @@ -0,0 +1,845 @@ +/* Assembly functions for the Xtensa version of libgcc1. + Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009 + Free Software Foundation, Inc. + Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "xtensa-config.h" + +/* Define macros for the ABS and ADDX* instructions to handle cases + where they are not included in the Xtensa processor configuration. */ + + .macro do_abs dst, src, tmp +#if XCHAL_HAVE_ABS + abs \dst, \src +#else + neg \tmp, \src + movgez \tmp, \src, \src + mov \dst, \tmp +#endif + .endm + + .macro do_addx2 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx2 \dst, \as, \at +#else + slli \tmp, \as, 1 + add \dst, \tmp, \at +#endif + .endm + + .macro do_addx4 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx4 \dst, \as, \at +#else + slli \tmp, \as, 2 + add \dst, \tmp, \at +#endif + .endm + + .macro do_addx8 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx8 \dst, \as, \at +#else + slli \tmp, \as, 3 + add \dst, \tmp, \at +#endif + .endm + +/* Define macros for leaf function entry and return, supporting either the + standard register windowed ABI or the non-windowed call0 ABI. These + macros do not allocate any extra stack space, so they only work for + leaf functions that do not need to spill anything to the stack. */ + + .macro leaf_entry reg, size +#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ + entry \reg, \size +#else + /* do nothing */ +#endif + .endm + + .macro leaf_return +#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ + retw +#else + ret +#endif + .endm + + +#ifdef L_mulsi3 + .align 4 + .global __mulsi3 + .type __mulsi3, @function +__mulsi3: + leaf_entry sp, 16 + +#if XCHAL_HAVE_MUL32 + mull a2, a2, a3 + +#elif XCHAL_HAVE_MUL16 + or a4, a2, a3 + srai a4, a4, 16 + bnez a4, .LMUL16 + mul16u a2, a2, a3 + leaf_return +.LMUL16: + srai a4, a2, 16 + srai a5, a3, 16 + mul16u a7, a4, a3 + mul16u a6, a5, a2 + mul16u a4, a2, a3 + add a7, a7, a6 + slli a7, a7, 16 + add a2, a7, a4 + +#elif XCHAL_HAVE_MAC16 + mul.aa.hl a2, a3 + mula.aa.lh a2, a3 + rsr a5, ACCLO + umul.aa.ll a2, a3 + rsr a4, ACCLO + slli a5, a5, 16 + add a2, a4, a5 + +#else /* !MUL32 && !MUL16 && !MAC16 */ + + /* Multiply one bit at a time, but unroll the loop 4x to better + exploit the addx instructions and avoid overhead. + Peel the first iteration to save a cycle on init. */ + + /* Avoid negative numbers. */ + xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ + do_abs a3, a3, a6 + do_abs a2, a2, a6 + + /* Swap so the second argument is smaller. */ + sub a7, a2, a3 + mov a4, a3 + movgez a4, a2, a7 /* a4 = max (a2, a3) */ + movltz a3, a2, a7 /* a3 = min (a2, a3) */ + + movi a2, 0 + extui a6, a3, 0, 1 + movnez a2, a4, a6 + + do_addx2 a7, a4, a2, a7 + extui a6, a3, 1, 1 + movnez a2, a7, a6 + + do_addx4 a7, a4, a2, a7 + extui a6, a3, 2, 1 + movnez a2, a7, a6 + + do_addx8 a7, a4, a2, a7 + extui a6, a3, 3, 1 + movnez a2, a7, a6 + + bgeui a3, 16, .Lmult_main_loop + neg a3, a2 + movltz a2, a3, a5 + leaf_return + + .align 4 +.Lmult_main_loop: + srli a3, a3, 4 + slli a4, a4, 4 + + add a7, a4, a2 + extui a6, a3, 0, 1 + movnez a2, a7, a6 + + do_addx2 a7, a4, a2, a7 + extui a6, a3, 1, 1 + movnez a2, a7, a6 + + do_addx4 a7, a4, a2, a7 + extui a6, a3, 2, 1 + movnez a2, a7, a6 + + do_addx8 a7, a4, a2, a7 + extui a6, a3, 3, 1 + movnez a2, a7, a6 + + bgeui a3, 16, .Lmult_main_loop + + neg a3, a2 + movltz a2, a3, a5 + +#endif /* !MUL32 && !MUL16 && !MAC16 */ + + leaf_return + .size __mulsi3, . - __mulsi3 + +#endif /* L_mulsi3 */ + + +#ifdef L_umulsidi3 + +#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 1 +#endif + + .align 4 + .global __umulsidi3 + .type __umulsidi3, @function +__umulsidi3: +#if __XTENSA_CALL0_ABI__ + leaf_entry sp, 32 + addi sp, sp, -32 + s32i a12, sp, 16 + s32i a13, sp, 20 + s32i a14, sp, 24 + s32i a15, sp, 28 +#elif XCHAL_NO_MUL + /* This is not really a leaf function; allocate enough stack space + to allow CALL12s to a helper function. */ + leaf_entry sp, 48 +#else + leaf_entry sp, 16 +#endif + +#ifdef __XTENSA_EB__ +#define wh a2 +#define wl a3 +#else +#define wh a3 +#define wl a2 +#endif /* __XTENSA_EB__ */ + + /* This code is taken from the mulsf3 routine in ieee754-sf.S. + See more comments there. */ + +#if XCHAL_HAVE_MUL32_HIGH + mull a6, a2, a3 + muluh wh, a2, a3 + mov wl, a6 + +#else /* ! MUL32_HIGH */ + +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + /* a0 and a8 will be clobbered by calling the multiply function + but a8 is not used here and need not be saved. */ + s32i a0, sp, 0 +#endif + +#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 + +#define a2h a4 +#define a3h a5 + + /* Get the high halves of the inputs into registers. */ + srli a2h, a2, 16 + srli a3h, a3, 16 + +#define a2l a2 +#define a3l a3 + +#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 + /* Clear the high halves of the inputs. This does not matter + for MUL16 because the high bits are ignored. */ + extui a2, a2, 0, 16 + extui a3, a3, 0, 16 +#endif +#endif /* MUL16 || MUL32 */ + + +#if XCHAL_HAVE_MUL16 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mul16u dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MUL32 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mull dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MAC16 + +/* The preprocessor insists on inserting a space when concatenating after + a period in the definition of do_mul below. These macros are a workaround + using underscores instead of periods when doing the concatenation. */ +#define umul_aa_ll umul.aa.ll +#define umul_aa_lh umul.aa.lh +#define umul_aa_hl umul.aa.hl +#define umul_aa_hh umul.aa.hh + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + umul_aa_ ## xhalf ## yhalf xreg, yreg; \ + rsr dst, ACCLO + +#else /* no multiply hardware */ + +#define set_arg_l(dst, src) \ + extui dst, src, 0, 16 +#define set_arg_h(dst, src) \ + srli dst, src, 16 + +#if __XTENSA_CALL0_ABI__ +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a13, xreg); \ + set_arg_ ## yhalf (a14, yreg); \ + call0 .Lmul_mulsi3; \ + mov dst, a12 +#else +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a14, xreg); \ + set_arg_ ## yhalf (a15, yreg); \ + call12 .Lmul_mulsi3; \ + mov dst, a14 +#endif /* __XTENSA_CALL0_ABI__ */ + +#endif /* no multiply hardware */ + + /* Add pp1 and pp2 into a6 with carry-out in a9. */ + do_mul(a6, a2, l, a3, h) /* pp 1 */ + do_mul(a11, a2, h, a3, l) /* pp 2 */ + movi a9, 0 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Shift the high half of a9/a6 into position in a9. Note that + this value can be safely incremented without any carry-outs. */ + ssai 16 + src a9, a9, a6 + + /* Compute the low word into a6. */ + do_mul(a11, a2, l, a3, l) /* pp 0 */ + sll a6, a6 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Compute the high word into wh. */ + do_mul(wh, a2, h, a3, h) /* pp 3 */ + add wh, wh, a9 + mov wl, a6 + +#endif /* !MUL32_HIGH */ + +#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL + /* Restore the original return address. */ + l32i a0, sp, 0 +#endif +#if __XTENSA_CALL0_ABI__ + l32i a12, sp, 16 + l32i a13, sp, 20 + l32i a14, sp, 24 + l32i a15, sp, 28 + addi sp, sp, 32 +#endif + leaf_return + +#if XCHAL_NO_MUL + + /* For Xtensa processors with no multiply hardware, this simplified + version of _mulsi3 is used for multiplying 16-bit chunks of + the floating-point mantissas. When using CALL0, this function + uses a custom ABI: the inputs are passed in a13 and a14, the + result is returned in a12, and a8 and a15 are clobbered. */ + .align 4 +.Lmul_mulsi3: + leaf_entry sp, 16 + .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 + movi \dst, 0 +1: add \tmp1, \src2, \dst + extui \tmp2, \src1, 0, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx2 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 1, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx4 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 2, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx8 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 3, 1 + movnez \dst, \tmp1, \tmp2 + + srli \src1, \src1, 4 + slli \src2, \src2, 4 + bnez \src1, 1b + .endm +#if __XTENSA_CALL0_ABI__ + mul_mulsi3_body a12, a13, a14, a15, a8 +#else + /* The result will be written into a2, so save that argument in a4. */ + mov a4, a2 + mul_mulsi3_body a2, a4, a3, a5, a6 +#endif + leaf_return +#endif /* XCHAL_NO_MUL */ + + .size __umulsidi3, . - __umulsidi3 + +#endif /* L_umulsidi3 */ + + +/* Define a macro for the NSAU (unsigned normalize shift amount) + instruction, which computes the number of leading zero bits, + to handle cases where it is not included in the Xtensa processor + configuration. */ + + .macro do_nsau cnt, val, tmp, a +#if XCHAL_HAVE_NSA + nsau \cnt, \val +#else + mov \a, \val + movi \cnt, 0 + extui \tmp, \a, 16, 16 + bnez \tmp, 0f + movi \cnt, 16 + slli \a, \a, 16 +0: + extui \tmp, \a, 24, 8 + bnez \tmp, 1f + addi \cnt, \cnt, 8 + slli \a, \a, 8 +1: + movi \tmp, __nsau_data + extui \a, \a, 24, 8 + add \tmp, \tmp, \a + l8ui \tmp, \tmp, 0 + add \cnt, \cnt, \tmp +#endif /* !XCHAL_HAVE_NSA */ + .endm + +#ifdef L_clz + .section .rodata + .align 4 + .global __nsau_data + .type __nsau_data, @object +__nsau_data: +#if !XCHAL_HAVE_NSA + .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 + .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 + .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +#endif /* !XCHAL_HAVE_NSA */ + .size __nsau_data, . - __nsau_data + .hidden __nsau_data +#endif /* L_clz */ + + +#ifdef L_clzsi2 + .align 4 + .global __clzsi2 + .type __clzsi2, @function +__clzsi2: + leaf_entry sp, 16 + do_nsau a2, a2, a3, a4 + leaf_return + .size __clzsi2, . - __clzsi2 + +#endif /* L_clzsi2 */ + + +#ifdef L_ctzsi2 + .align 4 + .global __ctzsi2 + .type __ctzsi2, @function +__ctzsi2: + leaf_entry sp, 16 + neg a3, a2 + and a3, a3, a2 + do_nsau a2, a3, a4, a5 + neg a2, a2 + addi a2, a2, 31 + leaf_return + .size __ctzsi2, . - __ctzsi2 + +#endif /* L_ctzsi2 */ + + +#ifdef L_ffssi2 + .align 4 + .global __ffssi2 + .type __ffssi2, @function +__ffssi2: + leaf_entry sp, 16 + neg a3, a2 + and a3, a3, a2 + do_nsau a2, a3, a4, a5 + neg a2, a2 + addi a2, a2, 32 + leaf_return + .size __ffssi2, . - __ffssi2 + +#endif /* L_ffssi2 */ + + +#ifdef L_udivsi3 + .align 4 + .global __udivsi3 + .type __udivsi3, @function +__udivsi3: + leaf_entry sp, 16 +#if XCHAL_HAVE_DIV32 + quou a2, a2, a3 +#else + bltui a3, 2, .Lle_one /* check if the divisor <= 1 */ + + mov a6, a2 /* keep dividend in a6 */ + do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */ + do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ + ssl a4 + sll a3, a3 /* divisor <<= count */ + movi a2, 0 /* quotient = 0 */ + + /* test-subtract-and-shift loop; one quotient bit on each iteration */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a6, a3, .Lzerobit + sub a6, a6, a3 + addi a2, a2, 1 +.Lzerobit: + slli a2, a2, 1 + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + + bltu a6, a3, .Lreturn + addi a2, a2, 1 /* increment quotient if dividend >= divisor */ +.Lreturn: + leaf_return + +.Lle_one: + beqz a3, .Lerror /* if divisor == 1, return the dividend */ + leaf_return + +.Lspecial: + /* return dividend >= divisor */ + bltu a6, a3, .Lreturn0 + movi a2, 1 + leaf_return + +.Lerror: + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + leaf_return + .size __udivsi3, . - __udivsi3 + +#endif /* L_udivsi3 */ + + +#ifdef L_divsi3 + .align 4 + .global __divsi3 + .type __divsi3, @function +__divsi3: + leaf_entry sp, 16 +#if XCHAL_HAVE_DIV32 + quos a2, a2, a3 +#else + xor a7, a2, a3 /* sign = dividend ^ divisor */ + do_abs a6, a2, a4 /* udividend = abs (dividend) */ + do_abs a3, a3, a4 /* udivisor = abs (divisor) */ + bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ + do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */ + do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ + ssl a4 + sll a3, a3 /* udivisor <<= count */ + movi a2, 0 /* quotient = 0 */ + + /* test-subtract-and-shift loop; one quotient bit on each iteration */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a6, a3, .Lzerobit + sub a6, a6, a3 + addi a2, a2, 1 +.Lzerobit: + slli a2, a2, 1 + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + + bltu a6, a3, .Lreturn + addi a2, a2, 1 /* increment if udividend >= udivisor */ +.Lreturn: + neg a5, a2 + movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */ + leaf_return + +.Lle_one: + beqz a3, .Lerror + neg a2, a6 /* if udivisor == 1, then return... */ + movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */ + leaf_return + +.Lspecial: + bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */ + movi a2, 1 + movi a4, -1 + movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */ + leaf_return + +.Lerror: + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + leaf_return + .size __divsi3, . - __divsi3 + +#endif /* L_divsi3 */ + + +#ifdef L_umodsi3 + .align 4 + .global __umodsi3 + .type __umodsi3, @function +__umodsi3: + leaf_entry sp, 16 +#if XCHAL_HAVE_DIV32 + remu a2, a2, a3 +#else + bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */ + + do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */ + do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ + ssl a4 + sll a3, a3 /* divisor <<= count */ + + /* test-subtract-and-shift loop */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a2, a3, .Lzerobit + sub a2, a2, a3 +.Lzerobit: + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + +.Lspecial: + bltu a2, a3, .Lreturn + sub a2, a2, a3 /* subtract once more if dividend >= divisor */ +.Lreturn: + leaf_return + +.Lle_one: + bnez a3, .Lreturn0 + + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + leaf_return + .size __umodsi3, . - __umodsi3 + +#endif /* L_umodsi3 */ + + +#ifdef L_modsi3 + .align 4 + .global __modsi3 + .type __modsi3, @function +__modsi3: + leaf_entry sp, 16 +#if XCHAL_HAVE_DIV32 + rems a2, a2, a3 +#else + mov a7, a2 /* save original (signed) dividend */ + do_abs a2, a2, a4 /* udividend = abs (dividend) */ + do_abs a3, a3, a4 /* udivisor = abs (divisor) */ + bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ + do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */ + do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */ + bgeu a5, a4, .Lspecial + + sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ + ssl a4 + sll a3, a3 /* udivisor <<= count */ + + /* test-subtract-and-shift loop */ +#if XCHAL_HAVE_LOOPS + loopnez a4, .Lloopend +#endif /* XCHAL_HAVE_LOOPS */ +.Lloop: + bltu a2, a3, .Lzerobit + sub a2, a2, a3 +.Lzerobit: + srli a3, a3, 1 +#if !XCHAL_HAVE_LOOPS + addi a4, a4, -1 + bnez a4, .Lloop +#endif /* !XCHAL_HAVE_LOOPS */ +.Lloopend: + +.Lspecial: + bltu a2, a3, .Lreturn + sub a2, a2, a3 /* subtract again if udividend >= udivisor */ +.Lreturn: + bgez a7, .Lpositive + neg a2, a2 /* if (dividend < 0), return -udividend */ +.Lpositive: + leaf_return + +.Lle_one: + bnez a3, .Lreturn0 + + /* Divide by zero: Use an illegal instruction to force an exception. + The subsequent "DIV0" string can be recognized by the exception + handler to identify the real cause of the exception. */ + ill + .ascii "DIV0" + +.Lreturn0: + movi a2, 0 +#endif /* XCHAL_HAVE_DIV32 */ + leaf_return + .size __modsi3, . - __modsi3 + +#endif /* L_modsi3 */ + + +#ifdef __XTENSA_EB__ +#define uh a2 +#define ul a3 +#else +#define uh a3 +#define ul a2 +#endif /* __XTENSA_EB__ */ + + +#ifdef L_ashldi3 + .align 4 + .global __ashldi3 + .type __ashldi3, @function +__ashldi3: + leaf_entry sp, 16 + ssl a4 + bgei a4, 32, .Llow_only + src uh, uh, ul + sll ul, ul + leaf_return + +.Llow_only: + sll uh, ul + movi ul, 0 + leaf_return + .size __ashldi3, . - __ashldi3 + +#endif /* L_ashldi3 */ + + +#ifdef L_ashrdi3 + .align 4 + .global __ashrdi3 + .type __ashrdi3, @function +__ashrdi3: + leaf_entry sp, 16 + ssr a4 + bgei a4, 32, .Lhigh_only + src ul, uh, ul + sra uh, uh + leaf_return + +.Lhigh_only: + sra ul, uh + srai uh, uh, 31 + leaf_return + .size __ashrdi3, . - __ashrdi3 + +#endif /* L_ashrdi3 */ + + +#ifdef L_lshrdi3 + .align 4 + .global __lshrdi3 + .type __lshrdi3, @function +__lshrdi3: + leaf_entry sp, 16 + ssr a4 + bgei a4, 32, .Lhigh_only1 + src ul, uh, ul + srl uh, uh + leaf_return + +.Lhigh_only1: + srl ul, uh + movi uh, 0 + leaf_return + .size __lshrdi3, . - __lshrdi3 + +#endif /* L_lshrdi3 */ + + +#include "ieee754-df.S" +#include "ieee754-sf.S" diff --git a/gcc/config/xtensa/lib2funcs.S b/gcc/config/xtensa/lib2funcs.S new file mode 100644 index 000000000..65134e24c --- /dev/null +++ b/gcc/config/xtensa/lib2funcs.S @@ -0,0 +1,186 @@ +/* Assembly functions for libgcc2. + Copyright (C) 2001, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "xtensa-config.h" + +/* __xtensa_libgcc_window_spill: This function flushes out all but the + current register window. This is used to set up the stack so that + arbitrary frames can be accessed. */ + + .align 4 + .global __xtensa_libgcc_window_spill + .type __xtensa_libgcc_window_spill,@function +__xtensa_libgcc_window_spill: + entry sp, 32 + movi a2, 0 + syscall + retw + .size __xtensa_libgcc_window_spill, .-__xtensa_libgcc_window_spill + + +/* __xtensa_nonlocal_goto: This code does all the hard work of a + nonlocal goto on Xtensa. It is here in the library to avoid the + code size bloat of generating it in-line. There are two + arguments: + + a2 = frame pointer for the procedure containing the label + a3 = goto handler address + + This function never returns to its caller but instead goes directly + to the address of the specified goto handler. */ + + .align 4 + .global __xtensa_nonlocal_goto + .type __xtensa_nonlocal_goto,@function +__xtensa_nonlocal_goto: + entry sp, 32 + + /* Flush registers. */ + mov a5, a2 + movi a2, 0 + syscall + mov a2, a5 + + /* Because the save area for a0-a3 is stored one frame below + the one identified by a2, the only way to restore those + registers is to unwind the stack. If alloca() were never + called, we could just unwind until finding the sp value + matching a2. However, a2 is a frame pointer, not a stack + pointer, and may not be encountered during the unwinding. + The solution is to unwind until going _past_ the value + given by a2. This involves keeping three stack pointer + values during the unwinding: + + next = sp of frame N-1 + cur = sp of frame N + prev = sp of frame N+1 + + When next > a2, the desired save area is stored relative + to prev. At this point, cur will be the same as a2 + except in the alloca() case. + + Besides finding the values to be restored to a0-a3, we also + need to find the current window size for the target + function. This can be extracted from the high bits of the + return address, initially in a0. As the unwinding + proceeds, the window size is taken from the value of a0 + saved _two_ frames below the current frame. */ + + addi a5, sp, -16 /* a5 = prev - save area */ + l32i a6, a5, 4 + addi a6, a6, -16 /* a6 = cur - save area */ + mov a8, a0 /* a8 = return address (for window size) */ + j .Lfirstframe + +.Lnextframe: + l32i a8, a5, 0 /* next return address (for window size) */ + mov a5, a6 /* advance prev */ + addi a6, a7, -16 /* advance cur */ +.Lfirstframe: + l32i a7, a6, 4 /* a7 = next */ + bgeu a2, a7, .Lnextframe + + /* At this point, prev (a5) points to the save area with the saved + values of a0-a3. Copy those values into the save area at the + current sp so they will be reloaded when the return from this + function underflows. We don't have to worry about exceptions + while updating the current save area, because the windows have + already been flushed. */ + + addi a4, sp, -16 /* a4 = save area of this function */ + l32i a6, a5, 0 + l32i a7, a5, 4 + s32i a6, a4, 0 + s32i a7, a4, 4 + l32i a6, a5, 8 + l32i a7, a5, 12 + s32i a6, a4, 8 + s32i a7, a4, 12 + + /* Set return address to goto handler. Use the window size bits + from the return address two frames below the target. */ + extui a8, a8, 30, 2 /* get window size from return addr. */ + slli a3, a3, 2 /* get goto handler addr. << 2 */ + ssai 2 + src a0, a8, a3 /* combine them with a funnel shift */ + + retw + .size __xtensa_nonlocal_goto, .-__xtensa_nonlocal_goto + + +/* __xtensa_sync_caches: This function is called after writing a trampoline + on the stack to force all the data writes to memory and invalidate the + instruction cache. a2 is the address of the new trampoline. + + After the trampoline data is written out, it must be flushed out of + the data cache into memory. We use DHWB in case we have a writeback + cache. At least one DHWB instruction is needed for each data cache + line which may be touched by the trampoline. An ISYNC instruction + must follow the DHWBs. + + We have to flush the i-cache to make sure that the new values get used. + At least one IHI instruction is needed for each i-cache line which may + be touched by the trampoline. An ISYNC instruction is also needed to + make sure that the modified instructions are loaded into the instruction + fetch buffer. */ + +/* Use the maximum trampoline size. Flushing a bit extra is OK. */ +#define TRAMPOLINE_SIZE 60 + + .text + .align 4 + .global __xtensa_sync_caches + .type __xtensa_sync_caches,@function +__xtensa_sync_caches: + entry sp, 32 +#if XCHAL_DCACHE_SIZE > 0 + /* Flush the trampoline from the data cache. */ + extui a4, a2, 0, XCHAL_DCACHE_LINEWIDTH + addi a4, a4, TRAMPOLINE_SIZE + addi a4, a4, (1 << XCHAL_DCACHE_LINEWIDTH) - 1 + srli a4, a4, XCHAL_DCACHE_LINEWIDTH + mov a3, a2 +.Ldcache_loop: + dhwb a3, 0 + addi a3, a3, (1 << XCHAL_DCACHE_LINEWIDTH) + addi a4, a4, -1 + bnez a4, .Ldcache_loop + isync +#endif +#if XCHAL_ICACHE_SIZE > 0 + /* Invalidate the corresponding lines in the instruction cache. */ + extui a4, a2, 0, XCHAL_ICACHE_LINEWIDTH + addi a4, a4, TRAMPOLINE_SIZE + addi a4, a4, (1 << XCHAL_ICACHE_LINEWIDTH) - 1 + srli a4, a4, XCHAL_ICACHE_LINEWIDTH +.Licache_loop: + ihi a2, 0 + addi a2, a2, (1 << XCHAL_ICACHE_LINEWIDTH) + addi a4, a4, -1 + bnez a4, .Licache_loop +#endif + isync + retw + .size __xtensa_sync_caches, .-__xtensa_sync_caches diff --git a/gcc/config/xtensa/libgcc-xtensa.ver b/gcc/config/xtensa/libgcc-xtensa.ver new file mode 100644 index 000000000..43e7d4fc7 --- /dev/null +++ b/gcc/config/xtensa/libgcc-xtensa.ver @@ -0,0 +1,3 @@ +GCC_4.3.0 { + __umulsidi3 +} diff --git a/gcc/config/xtensa/linux-unwind.h b/gcc/config/xtensa/linux-unwind.h new file mode 100644 index 000000000..245649728 --- /dev/null +++ b/gcc/config/xtensa/linux-unwind.h @@ -0,0 +1,97 @@ +/* DWARF2 EH unwinding support for Xtensa. + Copyright (C) 2008, 2009, 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2-xtensa.c for the structs. + Don't use this at all if inhibit_libc is used. */ + +#ifndef inhibit_libc + +#include <signal.h> +#include <sys/ucontext.h> + +/* Encoded bytes for Xtensa instructions: + movi a2, __NR_rt_sigreturn + syscall + entry (first byte only) + Some of the bytes are endian-dependent. */ + +#define MOVI_BYTE0 0x22 +#define MOVI_BYTE2 225 /* __NR_rt_sigreturn */ +#define SYSC_BYTE0 0 +#define SYSC_BYTE2 0 + +#ifdef __XTENSA_EB__ +#define MOVI_BYTE1 0x0a +#define SYSC_BYTE1 0x05 +#define ENTRY_BYTE 0x6c +#else +#define MOVI_BYTE1 0xa0 +#define SYSC_BYTE1 0x50 +#define ENTRY_BYTE 0x36 +#endif + +#define MD_FALLBACK_FRAME_STATE_FOR xtensa_fallback_frame_state + +static _Unwind_Reason_Code +xtensa_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned char *pc = context->ra; + struct sigcontext *sc; + + struct rt_sigframe { + siginfo_t info; + struct ucontext uc; + } *rt_; + + /* movi a2, __NR_rt_sigreturn; syscall */ + if (pc[0] != MOVI_BYTE0 + || pc[1] != MOVI_BYTE1 + || pc[2] != MOVI_BYTE2 + || pc[3] != SYSC_BYTE0 + || pc[4] != SYSC_BYTE1 + || pc[5] != SYSC_BYTE2) + return _URC_END_OF_STACK; + + rt_ = context->sp; + sc = &rt_->uc.uc_mcontext; + fs->signal_regs = (_Unwind_Word *) sc->sc_a; + + /* If the signal arrived just before an ENTRY instruction, find the return + address and pretend the signal arrived before executing the CALL. */ + if (*(unsigned char *) sc->sc_pc == ENTRY_BYTE) + { + unsigned callinc = (sc->sc_ps >> 16) & 3; + fs->signal_ra = ((sc->sc_a[callinc << 2] & XTENSA_RA_FIELD_MASK) + | context->ra_high_bits) - 3; + } + else + fs->signal_ra = sc->sc_pc; + + fs->signal_frame = 1; + return _URC_NO_REASON; +} + +#endif /* ifdef inhibit_libc */ diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h new file mode 100644 index 000000000..83d2a9767 --- /dev/null +++ b/gcc/config/xtensa/linux.h @@ -0,0 +1,71 @@ +/* Xtensa Linux configuration. + Derived from the configuration for GCC for Intel i386 running Linux. + Copyright (C) 2001, 2002, 2003, 2006, 2007, 2008, 2010, 2011 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS() + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" + +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (Xtensa GNU/Linux with ELF)", stderr); + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "long int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#undef ASM_SPEC +#define ASM_SPEC \ + "%{mtext-section-literals:--text-section-literals} \ + %{mno-text-section-literals:--no-text-section-literals} \ + %{mtarget-align:--target-align} \ + %{mno-target-align:--no-target-align} \ + %{mlongcalls:--longcalls} \ + %{mno-longcalls:--no-longcalls}" + +#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1" + +#undef LINK_SPEC +#define LINK_SPEC \ + "%{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " LINUX_DYNAMIC_LINKER "} \ + %{static:-static}}" + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +/* Always enable "-fpic" for Xtensa Linux. */ +#define XTENSA_ALWAYS_PIC 1 + +#undef DBX_REGISTER_NUMBER + +#define MD_UNWIND_SUPPORT "config/xtensa/linux-unwind.h" + diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md new file mode 100644 index 000000000..27f058de7 --- /dev/null +++ b/gcc/config/xtensa/predicates.md @@ -0,0 +1,175 @@ +;; Predicate definitions for Xtensa. +;; Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_predicate "add_operand" + (ior (and (match_code "const_int") + (match_test "xtensa_simm8 (INTVAL (op)) + || xtensa_simm8x256 (INTVAL (op))")) + (match_operand 0 "register_operand"))) + +(define_predicate "addsubx_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 + || INTVAL (op) == 4 + || INTVAL (op) == 8"))) + +(define_predicate "arith_operand" + (ior (and (match_code "const_int") + (match_test "xtensa_simm8 (INTVAL (op))")) + (match_operand 0 "register_operand"))) + +;; Non-immediate operand excluding the constant pool. +(define_predicate "nonimmed_operand" + (ior (and (match_operand 0 "memory_operand") + (match_test "!constantpool_mem_p (op)")) + (match_operand 0 "register_operand"))) + +;; Memory operand excluding the constant pool. +(define_predicate "mem_operand" + (and (match_operand 0 "memory_operand") + (match_test "!constantpool_mem_p (op)"))) + +;; Memory operand in the constant pool. +(define_predicate "constantpool_operand" + (match_test "constantpool_mem_p (op)")) + +(define_predicate "mask_operand" + (ior (and (match_code "const_int") + (match_test "xtensa_mask_immediate (INTVAL (op))")) + (match_operand 0 "register_operand"))) + +(define_predicate "extui_fldsz_operand" + (and (match_code "const_int") + (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)"))) + +(define_predicate "sext_operand" + (if_then_else (match_test "TARGET_SEXT") + (match_operand 0 "nonimmed_operand") + (match_operand 0 "mem_operand"))) + +(define_predicate "sext_fldsz_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23"))) + +(define_predicate "lsbitnum_operand" + (and (match_code "const_int") + (match_test "BITS_BIG_ENDIAN + ? (INTVAL (op) == BITS_PER_WORD - 1) + : (INTVAL (op) == 0)"))) + +(define_predicate "branch_operand" + (ior (and (match_code "const_int") + (match_test "xtensa_b4const_or_zero (INTVAL (op))")) + (match_operand 0 "register_operand"))) + +(define_predicate "ubranch_operand" + (ior (and (match_code "const_int") + (match_test "xtensa_b4constu (INTVAL (op))")) + (match_operand 0 "register_operand"))) + +(define_predicate "call_insn_operand" + (match_code "const_int,const,symbol_ref,reg") +{ + if ((GET_CODE (op) == REG) + && (op != arg_pointer_rtx) + && ((REGNO (op) < FRAME_POINTER_REGNUM) + || (REGNO (op) > LAST_VIRTUAL_REGISTER))) + return true; + + if (CONSTANT_ADDRESS_P (op)) + { + /* Direct calls only allowed to static functions with PIC. */ + if (flag_pic) + { + tree callee, callee_sec, caller_sec; + + if (GET_CODE (op) != SYMBOL_REF + || !SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_EXTERNAL_P (op)) + return false; + + /* Don't attempt a direct call if the callee is known to be in + a different section, since there's a good chance it will be + out of range. */ + + if (flag_function_sections + || DECL_ONE_ONLY (current_function_decl)) + return false; + caller_sec = DECL_SECTION_NAME (current_function_decl); + callee = SYMBOL_REF_DECL (op); + if (callee) + { + if (DECL_ONE_ONLY (callee)) + return false; + callee_sec = DECL_SECTION_NAME (callee); + if (((caller_sec == NULL_TREE) ^ (callee_sec == NULL_TREE)) + || (caller_sec != NULL_TREE + && strcmp (TREE_STRING_POINTER (caller_sec), + TREE_STRING_POINTER (callee_sec)) != 0)) + return false; + } + else if (caller_sec != NULL_TREE) + return false; + } + return true; + } + + return false; +}) + +(define_predicate "move_operand" + (ior + (ior (match_operand 0 "register_operand") + (and (match_operand 0 "memory_operand") + (match_test "!constantpool_mem_p (op) + || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) + (ior (and (match_code "const_int") + (match_test "GET_MODE_CLASS (mode) == MODE_INT + && xtensa_simm12b (INTVAL (op))")) + (and (match_code "const_int,const_double,const,symbol_ref,label_ref") + (match_test "TARGET_CONST16 && CONSTANT_P (op) + && GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))))) + +;; Accept the floating point constant 1 in the appropriate mode. +(define_predicate "const_float_1_operand" + (match_code "const_double") +{ + REAL_VALUE_TYPE d; + REAL_VALUE_FROM_CONST_DOUBLE (d, op); + return REAL_VALUES_EQUAL (d, dconst1); +}) + +(define_predicate "fpmem_offset_operand" + (and (match_code "const_int") + (match_test "xtensa_mem_offset (INTVAL (op), SFmode)"))) + +(define_predicate "branch_operator" + (match_code "eq,ne,lt,ge")) + +(define_predicate "ubranch_operator" + (match_code "ltu,geu")) + +(define_predicate "boolean_operator" + (match_code "eq,ne")) + +(define_predicate "xtensa_cstoresi_operator" + (match_code "eq,ne,gt,ge,lt,le")) + +(define_predicate "tls_symbol_operand" + (and (match_code "symbol_ref") + (match_test "SYMBOL_REF_TLS_MODEL (op) != 0"))) diff --git a/gcc/config/xtensa/t-elf b/gcc/config/xtensa/t-elf new file mode 100644 index 000000000..7d6cd1a3a --- /dev/null +++ b/gcc/config/xtensa/t-elf @@ -0,0 +1,6 @@ +# Build CRT files and libgcc with the "longcalls" option +CRTSTUFF_T_CFLAGS += -mlongcalls +CRTSTUFF_T_CFLAGS_S += -mlongcalls +TARGET_LIBGCC2_CFLAGS += -mlongcalls + +EXTRA_MULTILIB_PARTS = crti.o crtn.o crtbegin.o crtend.o diff --git a/gcc/config/xtensa/t-linux b/gcc/config/xtensa/t-linux new file mode 100644 index 000000000..7d535e155 --- /dev/null +++ b/gcc/config/xtensa/t-linux @@ -0,0 +1,3 @@ +EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o + +SHLIB_MAPFILES += $(srcdir)/config/xtensa/libgcc-xtensa.ver diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa new file mode 100644 index 000000000..c3d98ae30 --- /dev/null +++ b/gcc/config/xtensa/t-xtensa @@ -0,0 +1,42 @@ +# Copyright (C) 2002, 2003, 2006, 2007, 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +LIB1ASMSRC = xtensa/lib1funcs.asm +LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ + _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ + _ashldi3 _ashrdi3 _lshrdi3 \ + _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ + _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \ + _floatdisf _floatundisf \ + _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \ + _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \ + _floatdidf _floatundidf \ + _truncdfsf2 _extendsfdf2 + +LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S +LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \ + $(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c + +$(T)crti.o: $(srcdir)/config/xtensa/crti.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/xtensa/crti.asm +$(T)crtn.o: $(srcdir)/config/xtensa/crtn.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/xtensa/crtn.asm + +$(out_object_file): gt-xtensa.h diff --git a/gcc/config/xtensa/unwind-dw2-xtensa.c b/gcc/config/xtensa/unwind-dw2-xtensa.c new file mode 100644 index 000000000..9544f65ab --- /dev/null +++ b/gcc/config/xtensa/unwind-dw2-xtensa.c @@ -0,0 +1,546 @@ +/* DWARF2 exception handling and frame unwinding for Xtensa. + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "tconfig.h" +#include "tsystem.h" +#include "coretypes.h" +#include "tm.h" +#include "dwarf2.h" +#include "unwind.h" +#ifdef __USING_SJLJ_EXCEPTIONS__ +# define NO_SIZE_OF_ENCODED_VALUE +#endif +#include "unwind-pe.h" +#include "unwind-dw2-fde.h" +#include "unwind-dw2-xtensa.h" + +#ifndef __USING_SJLJ_EXCEPTIONS__ + +/* The standard CIE and FDE structures work fine for Xtensa but the + variable-size register window save areas are not a good fit for the rest + of the standard DWARF unwinding mechanism. Nor is that mechanism + necessary, since the register save areas are always in fixed locations + in each stack frame. This file is a stripped down and customized version + of the standard DWARF unwinding code. It needs to be customized to have + builtin logic for finding the save areas and also to track the stack + pointer value (besides the CFA) while unwinding since the primary save + area is located below the stack pointer. It is stripped down to reduce + code size and ease the maintenance burden of tracking changes in the + standard version of the code. */ + +#ifndef DWARF_REG_TO_UNWIND_COLUMN +#define DWARF_REG_TO_UNWIND_COLUMN(REGNO) (REGNO) +#endif + +#define XTENSA_RA_FIELD_MASK 0x3FFFFFFF + +/* This is the register and unwind state for a particular frame. This + provides the information necessary to unwind up past a frame and return + to its caller. */ +struct _Unwind_Context +{ + /* Track register window save areas of 4 registers each, instead of + keeping separate addresses for the individual registers. */ + _Unwind_Word *reg[4]; + + void *cfa; + void *sp; + void *ra; + + /* Cache the 2 high bits to replace the window size in return addresses. */ + _Unwind_Word ra_high_bits; + + void *lsda; + struct dwarf_eh_bases bases; + /* Signal frame context. */ +#define SIGNAL_FRAME_BIT ((~(_Unwind_Word) 0 >> 1) + 1) + _Unwind_Word flags; + /* 0 for now, can be increased when further fields are added to + struct _Unwind_Context. */ + _Unwind_Word version; +}; + + +/* Read unaligned data from the instruction buffer. */ + +union unaligned +{ + void *p; +} __attribute__ ((packed)); + +static void uw_update_context (struct _Unwind_Context *, _Unwind_FrameState *); +static _Unwind_Reason_Code uw_frame_state_for (struct _Unwind_Context *, + _Unwind_FrameState *); + +static inline void * +read_pointer (const void *p) { const union unaligned *up = p; return up->p; } + +static inline _Unwind_Word +_Unwind_IsSignalFrame (struct _Unwind_Context *context) +{ + return (context->flags & SIGNAL_FRAME_BIT) ? 1 : 0; +} + +static inline void +_Unwind_SetSignalFrame (struct _Unwind_Context *context, int val) +{ + if (val) + context->flags |= SIGNAL_FRAME_BIT; + else + context->flags &= ~SIGNAL_FRAME_BIT; +} + +/* Get the value of register INDEX as saved in CONTEXT. */ + +inline _Unwind_Word +_Unwind_GetGR (struct _Unwind_Context *context, int index) +{ + _Unwind_Word *ptr; + + index = DWARF_REG_TO_UNWIND_COLUMN (index); + ptr = context->reg[index >> 2] + (index & 3); + + return *ptr; +} + +/* Get the value of the CFA as saved in CONTEXT. */ + +_Unwind_Word +_Unwind_GetCFA (struct _Unwind_Context *context) +{ + return (_Unwind_Ptr) context->cfa; +} + +/* Overwrite the saved value for register INDEX in CONTEXT with VAL. */ + +inline void +_Unwind_SetGR (struct _Unwind_Context *context, int index, _Unwind_Word val) +{ + _Unwind_Word *ptr; + + index = DWARF_REG_TO_UNWIND_COLUMN (index); + ptr = context->reg[index >> 2] + (index & 3); + + *ptr = val; +} + +/* Retrieve the return address for CONTEXT. */ + +inline _Unwind_Ptr +_Unwind_GetIP (struct _Unwind_Context *context) +{ + return (_Unwind_Ptr) context->ra; +} + +/* Retrieve the return address and flag whether that IP is before + or after first not yet fully executed instruction. */ + +inline _Unwind_Ptr +_Unwind_GetIPInfo (struct _Unwind_Context *context, int *ip_before_insn) +{ + *ip_before_insn = _Unwind_IsSignalFrame (context); + return (_Unwind_Ptr) context->ra; +} + +/* Overwrite the return address for CONTEXT with VAL. */ + +inline void +_Unwind_SetIP (struct _Unwind_Context *context, _Unwind_Ptr val) +{ + context->ra = (void *) val; +} + +void * +_Unwind_GetLanguageSpecificData (struct _Unwind_Context *context) +{ + return context->lsda; +} + +_Unwind_Ptr +_Unwind_GetRegionStart (struct _Unwind_Context *context) +{ + return (_Unwind_Ptr) context->bases.func; +} + +void * +_Unwind_FindEnclosingFunction (void *pc) +{ + struct dwarf_eh_bases bases; + const struct dwarf_fde *fde = _Unwind_Find_FDE (pc-1, &bases); + if (fde) + return bases.func; + else + return NULL; +} + +_Unwind_Ptr +_Unwind_GetDataRelBase (struct _Unwind_Context *context) +{ + return (_Unwind_Ptr) context->bases.dbase; +} + +_Unwind_Ptr +_Unwind_GetTextRelBase (struct _Unwind_Context *context) +{ + return (_Unwind_Ptr) context->bases.tbase; +} + +#ifdef MD_UNWIND_SUPPORT +#include MD_UNWIND_SUPPORT +#endif + +/* Extract any interesting information from the CIE for the translation + unit F belongs to. Return a pointer to the byte after the augmentation, + or NULL if we encountered an undecipherable augmentation. */ + +static const unsigned char * +extract_cie_info (const struct dwarf_cie *cie, struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + const unsigned char *aug = cie->augmentation; + const unsigned char *p = aug + strlen ((const char *)aug) + 1; + const unsigned char *ret = NULL; + _uleb128_t utmp; + _sleb128_t stmp; + + /* g++ v2 "eh" has pointer immediately following augmentation string, + so it must be handled first. */ + if (aug[0] == 'e' && aug[1] == 'h') + { + fs->eh_ptr = read_pointer (p); + p += sizeof (void *); + aug += 2; + } + + /* Immediately following the augmentation are the code and + data alignment and return address column. */ + p = read_uleb128 (p, &utmp); + p = read_sleb128 (p, &stmp); + if (cie->version == 1) + fs->retaddr_column = *p++; + else + { + p = read_uleb128 (p, &utmp); + fs->retaddr_column = (_Unwind_Word)utmp; + } + fs->lsda_encoding = DW_EH_PE_omit; + + /* If the augmentation starts with 'z', then a uleb128 immediately + follows containing the length of the augmentation field following + the size. */ + if (*aug == 'z') + { + p = read_uleb128 (p, &utmp); + ret = p + utmp; + + fs->saw_z = 1; + ++aug; + } + + /* Iterate over recognized augmentation subsequences. */ + while (*aug != '\0') + { + /* "L" indicates a byte showing how the LSDA pointer is encoded. */ + if (aug[0] == 'L') + { + fs->lsda_encoding = *p++; + aug += 1; + } + + /* "R" indicates a byte indicating how FDE addresses are encoded. */ + else if (aug[0] == 'R') + { + fs->fde_encoding = *p++; + aug += 1; + } + + /* "P" indicates a personality routine in the CIE augmentation. */ + else if (aug[0] == 'P') + { + _Unwind_Ptr personality; + + p = read_encoded_value (context, *p, p + 1, &personality); + fs->personality = (_Unwind_Personality_Fn) personality; + aug += 1; + } + + /* "S" indicates a signal frame. */ + else if (aug[0] == 'S') + { + fs->signal_frame = 1; + aug += 1; + } + + /* Otherwise we have an unknown augmentation string. + Bail unless we saw a 'z' prefix. */ + else + return ret; + } + + return ret ? ret : p; +} + +/* Given the _Unwind_Context CONTEXT for a stack frame, look up the FDE for + its caller and decode it into FS. This function also sets the + lsda member of CONTEXT, as it is really information + about the caller's frame. */ + +static _Unwind_Reason_Code +uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs) +{ + const struct dwarf_fde *fde; + const struct dwarf_cie *cie; + const unsigned char *aug; + int window_size; + _Unwind_Word *ra_ptr; + + memset (fs, 0, sizeof (*fs)); + context->lsda = 0; + + fde = _Unwind_Find_FDE (context->ra + _Unwind_IsSignalFrame (context) - 1, + &context->bases); + if (fde == NULL) + { +#ifdef MD_FALLBACK_FRAME_STATE_FOR + _Unwind_Reason_Code reason; + /* Couldn't find frame unwind info for this function. Try a + target-specific fallback mechanism. This will necessarily + not provide a personality routine or LSDA. */ + reason = MD_FALLBACK_FRAME_STATE_FOR (context, fs); + if (reason != _URC_END_OF_STACK) + return reason; +#endif + /* The frame was not recognized and handled by the fallback function, + but it is not really the end of the stack. Fall through here and + unwind it anyway. */ + } + else + { + cie = get_cie (fde); + if (extract_cie_info (cie, context, fs) == NULL) + /* CIE contained unknown augmentation. */ + return _URC_FATAL_PHASE1_ERROR; + + /* Locate augmentation for the fde. */ + aug = (const unsigned char *) fde + sizeof (*fde); + aug += 2 * size_of_encoded_value (fs->fde_encoding); + if (fs->saw_z) + { + _uleb128_t i; + aug = read_uleb128 (aug, &i); + } + if (fs->lsda_encoding != DW_EH_PE_omit) + { + _Unwind_Ptr lsda; + + aug = read_encoded_value (context, fs->lsda_encoding, aug, &lsda); + context->lsda = (void *) lsda; + } + } + + /* Check for the end of the stack. This needs to be checked after + the MD_FALLBACK_FRAME_STATE_FOR check for signal frames because + the contents of context->reg[0] are undefined at a signal frame, + and register a0 may appear to be zero. (The return address in + context->ra comes from register a4 or a8). */ + ra_ptr = context->reg[0]; + if (ra_ptr && *ra_ptr == 0) + return _URC_END_OF_STACK; + + /* Find the window size from the high bits of the return address. */ + if (ra_ptr) + window_size = (*ra_ptr >> 30) * 4; + else + window_size = 8; + + fs->retaddr_column = window_size; + + return _URC_NO_REASON; +} + +static void +uw_update_context_1 (struct _Unwind_Context *context, _Unwind_FrameState *fs) +{ + struct _Unwind_Context orig_context = *context; + _Unwind_Word *sp, *cfa, *next_cfa; + int i; + + if (fs->signal_regs) + { + cfa = (_Unwind_Word *) fs->signal_regs[1]; + next_cfa = (_Unwind_Word *) cfa[-3]; + + for (i = 0; i < 4; i++) + context->reg[i] = fs->signal_regs + (i << 2); + } + else + { + int window_size = fs->retaddr_column >> 2; + + sp = (_Unwind_Word *) orig_context.sp; + cfa = (_Unwind_Word *) orig_context.cfa; + next_cfa = (_Unwind_Word *) cfa[-3]; + + /* Registers a0-a3 are in the save area below sp. */ + context->reg[0] = sp - 4; + + /* Find the extra save area below next_cfa. */ + for (i = 1; i < window_size; i++) + context->reg[i] = next_cfa - 4 * (1 + window_size - i); + + /* Remaining registers rotate from previous save areas. */ + for (i = window_size; i < 4; i++) + context->reg[i] = orig_context.reg[i - window_size]; + } + + context->sp = cfa; + context->cfa = next_cfa; + + _Unwind_SetSignalFrame (context, fs->signal_frame); +} + +/* CONTEXT describes the unwind state for a frame, and FS describes the FDE + of its caller. Update CONTEXT to refer to the caller as well. Note + that the lsda member is not updated here, but later in + uw_frame_state_for. */ + +static void +uw_update_context (struct _Unwind_Context *context, _Unwind_FrameState *fs) +{ + uw_update_context_1 (context, fs); + + /* Compute the return address now, since the return address column + can change from frame to frame. */ + if (fs->signal_ra != 0) + context->ra = (void *) fs->signal_ra; + else + context->ra = (void *) ((_Unwind_GetGR (context, fs->retaddr_column) + & XTENSA_RA_FIELD_MASK) | context->ra_high_bits); +} + +static void +uw_advance_context (struct _Unwind_Context *context, _Unwind_FrameState *fs) +{ + uw_update_context (context, fs); +} + +/* Fill in CONTEXT for top-of-stack. The only valid registers at this + level will be the return address and the CFA. */ + +#define uw_init_context(CONTEXT) \ + do \ + { \ + __builtin_unwind_init (); \ + uw_init_context_1 (CONTEXT, __builtin_dwarf_cfa (), \ + __builtin_return_address (0)); \ + } \ + while (0) + +static void __attribute__((noinline)) +uw_init_context_1 (struct _Unwind_Context *context, void *outer_cfa, + void *outer_ra) +{ + void *ra = __builtin_return_address (0); + void *cfa = __builtin_dwarf_cfa (); + _Unwind_FrameState fs; + + memset (context, 0, sizeof (struct _Unwind_Context)); + context->ra = ra; + + memset (&fs, 0, sizeof (fs)); + fs.retaddr_column = 8; + context->sp = cfa; + context->cfa = outer_cfa; + context->ra_high_bits = + ((_Unwind_Word) uw_init_context_1) & ~XTENSA_RA_FIELD_MASK; + uw_update_context_1 (context, &fs); + + context->ra = outer_ra; +} + + +/* Install TARGET into CURRENT so that we can return to it. This is a + macro because __builtin_eh_return must be invoked in the context of + our caller. */ + +#define uw_install_context(CURRENT, TARGET) \ + do \ + { \ + long offset = uw_install_context_1 ((CURRENT), (TARGET)); \ + void *handler = __builtin_frob_return_addr ((TARGET)->ra); \ + __builtin_eh_return (offset, handler); \ + } \ + while (0) + +static long +uw_install_context_1 (struct _Unwind_Context *current, + struct _Unwind_Context *target) +{ + long i; + + /* The eh_return insn assumes a window size of 8, so don't bother copying + the save areas for registers a8-a15 since they won't be reloaded. */ + for (i = 0; i < 2; ++i) + { + void *c = current->reg[i]; + void *t = target->reg[i]; + + if (t && c && t != c) + memcpy (c, t, 4 * sizeof (_Unwind_Word)); + } + + return 0; +} + +static inline _Unwind_Ptr +uw_identify_context (struct _Unwind_Context *context) +{ + return _Unwind_GetCFA (context); +} + + +#include "unwind.inc" + +#if defined (USE_GAS_SYMVER) && defined (SHARED) && defined (USE_LIBUNWIND_EXCEPTIONS) +alias (_Unwind_Backtrace); +alias (_Unwind_DeleteException); +alias (_Unwind_FindEnclosingFunction); +alias (_Unwind_ForcedUnwind); +alias (_Unwind_GetDataRelBase); +alias (_Unwind_GetTextRelBase); +alias (_Unwind_GetCFA); +alias (_Unwind_GetGR); +alias (_Unwind_GetIP); +alias (_Unwind_GetLanguageSpecificData); +alias (_Unwind_GetRegionStart); +alias (_Unwind_RaiseException); +alias (_Unwind_Resume); +alias (_Unwind_Resume_or_Rethrow); +alias (_Unwind_SetGR); +alias (_Unwind_SetIP); +#endif + +#endif /* !USING_SJLJ_EXCEPTIONS */ diff --git a/gcc/config/xtensa/unwind-dw2-xtensa.h b/gcc/config/xtensa/unwind-dw2-xtensa.h new file mode 100644 index 000000000..d13b3264c --- /dev/null +++ b/gcc/config/xtensa/unwind-dw2-xtensa.h @@ -0,0 +1,50 @@ +/* DWARF2 frame unwind data structure for Xtensa. + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2007, 2008, + 2009 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* A target can override (perhaps for backward compatibility) how + many dwarf2 columns are unwound. */ +#ifndef DWARF_FRAME_REGISTERS +#define DWARF_FRAME_REGISTERS FIRST_PSEUDO_REGISTER +#endif + +/* Xtensa's variable-size register window save areas can be unwound without + any unwind info. This is a stripped down version of the standard DWARF + _Unwind_FrameState. */ +typedef struct +{ + /* The information we care about from the CIE/FDE. */ + _Unwind_Personality_Fn personality; + _Unwind_Word retaddr_column; + unsigned char fde_encoding; + unsigned char lsda_encoding; + unsigned char saw_z; + unsigned char signal_frame; + void *eh_ptr; + + /* Saved registers for a signal frame. */ + _Unwind_Word *signal_regs; + _Unwind_Word signal_ra; +} _Unwind_FrameState; + diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h new file mode 100644 index 000000000..0d1738f4e --- /dev/null +++ b/gcc/config/xtensa/xtensa-protos.h @@ -0,0 +1,74 @@ +/* Prototypes of target machine for GNU compiler for Xtensa. + Copyright 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef __XTENSA_PROTOS_H__ +#define __XTENSA_PROTOS_H__ + +/* Functions to test whether an immediate fits in a given field. */ +extern bool xtensa_simm8 (HOST_WIDE_INT); +extern bool xtensa_simm8x256 (HOST_WIDE_INT); +extern bool xtensa_simm12b (HOST_WIDE_INT); +extern bool xtensa_b4const_or_zero (HOST_WIDE_INT); +extern bool xtensa_b4constu (HOST_WIDE_INT); +extern bool xtensa_mask_immediate (HOST_WIDE_INT); +extern bool xtensa_mem_offset (unsigned, enum machine_mode); + +/* Functions within xtensa.c that we reference. */ +#ifdef RTX_CODE +extern int xt_true_regnum (rtx); +extern int xtensa_valid_move (enum machine_mode, rtx *); +extern int smalloffset_mem_p (rtx); +extern int constantpool_mem_p (rtx); +extern void xtensa_extend_reg (rtx, rtx); +extern void xtensa_expand_conditional_branch (rtx *, enum machine_mode); +extern int xtensa_expand_conditional_move (rtx *, int); +extern int xtensa_expand_scc (rtx *, enum machine_mode); +extern int xtensa_expand_block_move (rtx *); +extern void xtensa_split_operand_pair (rtx *, enum machine_mode); +extern int xtensa_emit_move_sequence (rtx *, enum machine_mode); +extern rtx xtensa_copy_incoming_a7 (rtx); +extern void xtensa_expand_nonlocal_goto (rtx *); +extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx); +extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool); +extern void xtensa_emit_loop_end (rtx, rtx *); +extern char *xtensa_emit_branch (bool, bool, rtx *); +extern char *xtensa_emit_bit_branch (bool, bool, rtx *); +extern char *xtensa_emit_movcc (bool, bool, bool, rtx *); +extern char *xtensa_emit_call (int, rtx *); +extern bool xtensa_tls_referenced_p (rtx); + +#ifdef TREE_CODE +extern void init_cumulative_args (CUMULATIVE_ARGS *, int); +#endif /* TREE_CODE */ + +extern void print_operand (FILE *, rtx, int); +extern void print_operand_address (FILE *, rtx); +extern void xtensa_output_literal (FILE *, rtx, enum machine_mode, int); +extern rtx xtensa_return_addr (int, rtx); +#endif /* RTX_CODE */ + +extern void xtensa_setup_frame_addresses (void); +extern int xtensa_dbx_register_number (int); +extern long compute_frame_size (int); +extern void xtensa_expand_prologue (void); +extern void order_regs_for_local_alloc (void); + +#endif /* !__XTENSA_PROTOS_H__ */ diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c new file mode 100644 index 000000000..e7c395be5 --- /dev/null +++ b/gcc/config/xtensa/xtensa.c @@ -0,0 +1,3715 @@ +/* Subroutines for insn-output.c for Tensilica's Xtensa architecture. + Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-flags.h" +#include "insn-attr.h" +#include "insn-codes.h" +#include "recog.h" +#include "output.h" +#include "tree.h" +#include "expr.h" +#include "flags.h" +#include "reload.h" +#include "tm_p.h" +#include "function.h" +#include "diagnostic-core.h" +#include "optabs.h" +#include "libfuncs.h" +#include "ggc.h" +#include "target.h" +#include "target-def.h" +#include "langhooks.h" +#include "gimple.h" +#include "df.h" + + +/* Enumeration for all of the relational tests, so that we can build + arrays indexed by the test type, and not worry about the order + of EQ, NE, etc. */ + +enum internal_test +{ + ITEST_EQ, + ITEST_NE, + ITEST_GT, + ITEST_GE, + ITEST_LT, + ITEST_LE, + ITEST_GTU, + ITEST_GEU, + ITEST_LTU, + ITEST_LEU, + ITEST_MAX +}; + +/* Array giving truth value on whether or not a given hard register + can support a given mode. */ +char xtensa_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER]; + +/* Current frame size calculated by compute_frame_size. */ +unsigned xtensa_current_frame_size; + +/* Largest block move to handle in-line. */ +#define LARGEST_MOVE_RATIO 15 + +/* Define the structure for the machine field in struct function. */ +struct GTY(()) machine_function +{ + int accesses_prev_frame; + bool need_a7_copy; + bool vararg_a7; + rtx vararg_a7_copy; + rtx set_frame_ptr_insn; +}; + +/* Vector, indexed by hard register number, which contains 1 for a + register that is allowable in a candidate for leaf function + treatment. */ + +const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1 +}; + +/* Map hard register number to register class */ +const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER] = +{ + RL_REGS, SP_REG, RL_REGS, RL_REGS, + RL_REGS, RL_REGS, RL_REGS, GR_REGS, + RL_REGS, RL_REGS, RL_REGS, RL_REGS, + RL_REGS, RL_REGS, RL_REGS, RL_REGS, + AR_REGS, AR_REGS, BR_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + ACC_REG, +}; + +static void xtensa_option_override (void); +static enum internal_test map_test_to_internal_test (enum rtx_code); +static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *); +static rtx gen_float_relational (enum rtx_code, rtx, rtx); +static rtx gen_conditional_move (enum rtx_code, enum machine_mode, rtx, rtx); +static rtx fixup_subreg_mem (rtx); +static struct machine_function * xtensa_init_machine_status (void); +static rtx xtensa_legitimize_tls_address (rtx); +static rtx xtensa_legitimize_address (rtx, rtx, enum machine_mode); +static bool xtensa_mode_dependent_address_p (const_rtx); +static bool xtensa_return_in_msb (const_tree); +static void printx (FILE *, signed int); +static void xtensa_function_epilogue (FILE *, HOST_WIDE_INT); +static rtx xtensa_builtin_saveregs (void); +static bool xtensa_legitimate_address_p (enum machine_mode, rtx, bool); +static unsigned int xtensa_multibss_section_type_flags (tree, const char *, + int) ATTRIBUTE_UNUSED; +static section *xtensa_select_rtx_section (enum machine_mode, rtx, + unsigned HOST_WIDE_INT); +static bool xtensa_rtx_costs (rtx, int, int, int *, bool); +static int xtensa_register_move_cost (enum machine_mode, reg_class_t, + reg_class_t); +static int xtensa_memory_move_cost (enum machine_mode, reg_class_t, bool); +static tree xtensa_build_builtin_va_list (void); +static bool xtensa_return_in_memory (const_tree, const_tree); +static tree xtensa_gimplify_va_arg_expr (tree, tree, gimple_seq *, + gimple_seq *); +static void xtensa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static rtx xtensa_function_arg (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static rtx xtensa_function_incoming_arg (CUMULATIVE_ARGS *, + enum machine_mode, const_tree, bool); +static rtx xtensa_function_value (const_tree, const_tree, bool); +static rtx xtensa_libcall_value (enum machine_mode, const_rtx); +static bool xtensa_function_value_regno_p (const unsigned int); +static unsigned int xtensa_function_arg_boundary (enum machine_mode, + const_tree); +static void xtensa_init_builtins (void); +static tree xtensa_fold_builtin (tree, int, tree *, bool); +static rtx xtensa_expand_builtin (tree, rtx, rtx, enum machine_mode, int); +static void xtensa_va_start (tree, rtx); +static bool xtensa_frame_pointer_required (void); +static rtx xtensa_static_chain (const_tree, bool); +static void xtensa_asm_trampoline_template (FILE *); +static void xtensa_trampoline_init (rtx, tree, rtx); +static bool xtensa_output_addr_const_extra (FILE *, rtx); + +static reg_class_t xtensa_preferred_reload_class (rtx, reg_class_t); +static reg_class_t xtensa_preferred_output_reload_class (rtx, reg_class_t); +static reg_class_t xtensa_secondary_reload (bool, rtx, reg_class_t, + enum machine_mode, + struct secondary_reload_info *); + +static bool constantpool_address_p (const_rtx addr); + +static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] = + REG_ALLOC_ORDER; + +/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ + +static const struct default_options xtensa_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + /* Reordering blocks for Xtensa is not a good idea unless the + compiler understands the range of conditional branches. + Currently all branch relaxation for Xtensa is handled in the + assembler, so GCC cannot do a good job of reordering blocks. + Do not enable reordering unless it is explicitly requested. */ + { OPT_LEVELS_ALL, OPT_freorder_blocks, NULL, 0 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + + +/* This macro generates the assembly code for function exit, + on machines that need it. If FUNCTION_EPILOGUE is not defined + then individual return instructions are generated for each + return statement. Args are same as for FUNCTION_PROLOGUE. */ + +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE xtensa_function_epilogue + +/* These hooks specify assembly directives for creating certain kinds + of integer object. */ + +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION xtensa_select_rtx_section + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT) + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS xtensa_legitimize_address +#undef TARGET_MODE_DEPENDENT_ADDRESS_P +#define TARGET_MODE_DEPENDENT_ADDRESS_P xtensa_mode_dependent_address_p + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST xtensa_register_move_cost +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS xtensa_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST xtensa_build_builtin_va_list + +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START xtensa_va_start + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY xtensa_return_in_memory +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE xtensa_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE xtensa_libcall_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P xtensa_function_value_regno_p + +#undef TARGET_SPLIT_COMPLEX_ARG +#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE xtensa_function_arg_advance +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG xtensa_function_arg +#undef TARGET_FUNCTION_INCOMING_ARG +#define TARGET_FUNCTION_INCOMING_ARG xtensa_function_incoming_arg +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY xtensa_function_arg_boundary + +#undef TARGET_EXPAND_BUILTIN_SAVEREGS +#define TARGET_EXPAND_BUILTIN_SAVEREGS xtensa_builtin_saveregs +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR xtensa_gimplify_va_arg_expr + +#undef TARGET_RETURN_IN_MSB +#define TARGET_RETURN_IN_MSB xtensa_return_in_msb + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS xtensa_init_builtins +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN xtensa_fold_builtin +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN xtensa_expand_builtin + +#undef TARGET_PREFERRED_RELOAD_CLASS +#define TARGET_PREFERRED_RELOAD_CLASS xtensa_preferred_reload_class +#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS +#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS xtensa_preferred_output_reload_class + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD xtensa_secondary_reload + +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS (TARGET_THREADPTR && HAVE_AS_TLS) + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM xtensa_tls_referenced_p + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P xtensa_legitimate_address_p + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED xtensa_frame_pointer_required + +#undef TARGET_STATIC_CHAIN +#define TARGET_STATIC_CHAIN xtensa_static_chain +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE xtensa_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT xtensa_trampoline_init + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE xtensa_option_override +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE xtensa_option_optimization_table + +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA xtensa_output_addr_const_extra + +struct gcc_target targetm = TARGET_INITIALIZER; + + +/* Functions to test Xtensa immediate operand validity. */ + +bool +xtensa_simm8 (HOST_WIDE_INT v) +{ + return v >= -128 && v <= 127; +} + + +bool +xtensa_simm8x256 (HOST_WIDE_INT v) +{ + return (v & 255) == 0 && (v >= -32768 && v <= 32512); +} + + +bool +xtensa_simm12b (HOST_WIDE_INT v) +{ + return v >= -2048 && v <= 2047; +} + + +static bool +xtensa_uimm8 (HOST_WIDE_INT v) +{ + return v >= 0 && v <= 255; +} + + +static bool +xtensa_uimm8x2 (HOST_WIDE_INT v) +{ + return (v & 1) == 0 && (v >= 0 && v <= 510); +} + + +static bool +xtensa_uimm8x4 (HOST_WIDE_INT v) +{ + return (v & 3) == 0 && (v >= 0 && v <= 1020); +} + + +static bool +xtensa_b4const (HOST_WIDE_INT v) +{ + switch (v) + { + case -1: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 10: + case 12: + case 16: + case 32: + case 64: + case 128: + case 256: + return true; + } + return false; +} + + +bool +xtensa_b4const_or_zero (HOST_WIDE_INT v) +{ + if (v == 0) + return true; + return xtensa_b4const (v); +} + + +bool +xtensa_b4constu (HOST_WIDE_INT v) +{ + switch (v) + { + case 32768: + case 65536: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 10: + case 12: + case 16: + case 32: + case 64: + case 128: + case 256: + return true; + } + return false; +} + + +bool +xtensa_mask_immediate (HOST_WIDE_INT v) +{ +#define MAX_MASK_SIZE 16 + int mask_size; + + for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++) + { + if ((v & 1) == 0) + return false; + v = v >> 1; + if (v == 0) + return true; + } + + return false; +} + + +/* This is just like the standard true_regnum() function except that it + works even when reg_renumber is not initialized. */ + +int +xt_true_regnum (rtx x) +{ + if (GET_CODE (x) == REG) + { + if (reg_renumber + && REGNO (x) >= FIRST_PSEUDO_REGISTER + && reg_renumber[REGNO (x)] >= 0) + return reg_renumber[REGNO (x)]; + return REGNO (x); + } + if (GET_CODE (x) == SUBREG) + { + int base = xt_true_regnum (SUBREG_REG (x)); + if (base >= 0 && base < FIRST_PSEUDO_REGISTER) + return base + subreg_regno_offset (REGNO (SUBREG_REG (x)), + GET_MODE (SUBREG_REG (x)), + SUBREG_BYTE (x), GET_MODE (x)); + } + return -1; +} + + +int +xtensa_valid_move (enum machine_mode mode, rtx *operands) +{ + /* Either the destination or source must be a register, and the + MAC16 accumulator doesn't count. */ + + if (register_operand (operands[0], mode)) + { + int dst_regnum = xt_true_regnum (operands[0]); + + /* The stack pointer can only be assigned with a MOVSP opcode. */ + if (dst_regnum == STACK_POINTER_REGNUM) + return (mode == SImode + && register_operand (operands[1], mode) + && !ACC_REG_P (xt_true_regnum (operands[1]))); + + if (!ACC_REG_P (dst_regnum)) + return true; + } + if (register_operand (operands[1], mode)) + { + int src_regnum = xt_true_regnum (operands[1]); + if (!ACC_REG_P (src_regnum)) + return true; + } + return FALSE; +} + + +int +smalloffset_mem_p (rtx op) +{ + if (GET_CODE (op) == MEM) + { + rtx addr = XEXP (op, 0); + if (GET_CODE (addr) == REG) + return BASE_REG_P (addr, 0); + if (GET_CODE (addr) == PLUS) + { + rtx offset = XEXP (addr, 0); + HOST_WIDE_INT val; + if (GET_CODE (offset) != CONST_INT) + offset = XEXP (addr, 1); + if (GET_CODE (offset) != CONST_INT) + return FALSE; + + val = INTVAL (offset); + return (val & 3) == 0 && (val >= 0 && val <= 60); + } + } + return FALSE; +} + + +static bool +constantpool_address_p (const_rtx addr) +{ + const_rtx sym = addr; + + if (GET_CODE (addr) == CONST) + { + rtx offset; + + /* Only handle (PLUS (SYM, OFFSET)) form. */ + addr = XEXP (addr, 0); + if (GET_CODE (addr) != PLUS) + return false; + + /* Make sure the address is word aligned. */ + offset = XEXP (addr, 1); + if ((!CONST_INT_P (offset)) + || ((INTVAL (offset) & 3) != 0)) + return false; + + sym = XEXP (addr, 0); + } + + if ((GET_CODE (sym) == SYMBOL_REF) + && CONSTANT_POOL_ADDRESS_P (sym)) + return true; + return false; +} + + +int +constantpool_mem_p (rtx op) +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) == MEM) + return constantpool_address_p (XEXP (op, 0)); + return FALSE; +} + + +/* Return TRUE if X is a thread-local symbol. */ + +static bool +xtensa_tls_symbol_p (rtx x) +{ + if (! TARGET_HAVE_TLS) + return false; + + return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0; +} + + +void +xtensa_extend_reg (rtx dst, rtx src) +{ + rtx temp = gen_reg_rtx (SImode); + rtx shift = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (GET_MODE (src))); + + /* Generate paradoxical subregs as needed so that the modes match. */ + src = simplify_gen_subreg (SImode, src, GET_MODE (src), 0); + dst = simplify_gen_subreg (SImode, dst, GET_MODE (dst), 0); + + emit_insn (gen_ashlsi3 (temp, src, shift)); + emit_insn (gen_ashrsi3 (dst, temp, shift)); +} + + +bool +xtensa_mem_offset (unsigned v, enum machine_mode mode) +{ + switch (mode) + { + case BLKmode: + /* Handle the worst case for block moves. See xtensa_expand_block_move + where we emit an optimized block move operation if the block can be + moved in < "move_ratio" pieces. The worst case is when the block is + aligned but has a size of (3 mod 4) (does this happen?) so that the + last piece requires a byte load/store. */ + return (xtensa_uimm8 (v) + && xtensa_uimm8 (v + MOVE_MAX * LARGEST_MOVE_RATIO)); + + case QImode: + return xtensa_uimm8 (v); + + case HImode: + return xtensa_uimm8x2 (v); + + case DFmode: + return (xtensa_uimm8x4 (v) && xtensa_uimm8x4 (v + 4)); + + default: + break; + } + + return xtensa_uimm8x4 (v); +} + + +/* Make normal rtx_code into something we can index from an array. */ + +static enum internal_test +map_test_to_internal_test (enum rtx_code test_code) +{ + enum internal_test test = ITEST_MAX; + + switch (test_code) + { + default: break; + case EQ: test = ITEST_EQ; break; + case NE: test = ITEST_NE; break; + case GT: test = ITEST_GT; break; + case GE: test = ITEST_GE; break; + case LT: test = ITEST_LT; break; + case LE: test = ITEST_LE; break; + case GTU: test = ITEST_GTU; break; + case GEU: test = ITEST_GEU; break; + case LTU: test = ITEST_LTU; break; + case LEU: test = ITEST_LEU; break; + } + + return test; +} + + +/* Generate the code to compare two integer values. The return value is + the comparison expression. */ + +static rtx +gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + rtx cmp0, /* first operand to compare */ + rtx cmp1, /* second operand to compare */ + int *p_invert /* whether branch needs to reverse test */) +{ + struct cmp_info + { + enum rtx_code test_code; /* test code to use in insn */ + bool (*const_range_p) (HOST_WIDE_INT); /* range check function */ + int const_add; /* constant to add (convert LE -> LT) */ + int reverse_regs; /* reverse registers in test */ + int invert_const; /* != 0 if invert value if cmp1 is constant */ + int invert_reg; /* != 0 if invert value if cmp1 is register */ + int unsignedp; /* != 0 for unsigned comparisons. */ + }; + + static struct cmp_info info[ (int)ITEST_MAX ] = { + + { EQ, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* EQ */ + { NE, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* NE */ + + { LT, xtensa_b4const_or_zero, 1, 1, 1, 0, 0 }, /* GT */ + { GE, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* GE */ + { LT, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* LT */ + { GE, xtensa_b4const_or_zero, 1, 1, 1, 0, 0 }, /* LE */ + + { LTU, xtensa_b4constu, 1, 1, 1, 0, 1 }, /* GTU */ + { GEU, xtensa_b4constu, 0, 0, 0, 0, 1 }, /* GEU */ + { LTU, xtensa_b4constu, 0, 0, 0, 0, 1 }, /* LTU */ + { GEU, xtensa_b4constu, 1, 1, 1, 0, 1 }, /* LEU */ + }; + + enum internal_test test; + enum machine_mode mode; + struct cmp_info *p_info; + + test = map_test_to_internal_test (test_code); + gcc_assert (test != ITEST_MAX); + + p_info = &info[ (int)test ]; + + mode = GET_MODE (cmp0); + if (mode == VOIDmode) + mode = GET_MODE (cmp1); + + /* Make sure we can handle any constants given to us. */ + if (GET_CODE (cmp1) == CONST_INT) + { + HOST_WIDE_INT value = INTVAL (cmp1); + unsigned HOST_WIDE_INT uvalue = (unsigned HOST_WIDE_INT)value; + + /* if the immediate overflows or does not fit in the immediate field, + spill it to a register */ + + if ((p_info->unsignedp ? + (uvalue + p_info->const_add > uvalue) : + (value + p_info->const_add > value)) != (p_info->const_add > 0)) + { + cmp1 = force_reg (mode, cmp1); + } + else if (!(p_info->const_range_p) (value + p_info->const_add)) + { + cmp1 = force_reg (mode, cmp1); + } + } + else if ((GET_CODE (cmp1) != REG) && (GET_CODE (cmp1) != SUBREG)) + { + cmp1 = force_reg (mode, cmp1); + } + + /* See if we need to invert the result. */ + *p_invert = ((GET_CODE (cmp1) == CONST_INT) + ? p_info->invert_const + : p_info->invert_reg); + + /* Comparison to constants, may involve adding 1 to change a LT into LE. + Comparison between two registers, may involve switching operands. */ + if (GET_CODE (cmp1) == CONST_INT) + { + if (p_info->const_add != 0) + cmp1 = GEN_INT (INTVAL (cmp1) + p_info->const_add); + + } + else if (p_info->reverse_regs) + { + rtx temp = cmp0; + cmp0 = cmp1; + cmp1 = temp; + } + + return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1); +} + + +/* Generate the code to compare two float values. The return value is + the comparison expression. */ + +static rtx +gen_float_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ + rtx cmp0, /* first operand to compare */ + rtx cmp1 /* second operand to compare */) +{ + rtx (*gen_fn) (rtx, rtx, rtx); + rtx brtmp; + int reverse_regs, invert; + + switch (test_code) + { + case EQ: reverse_regs = 0; invert = 0; gen_fn = gen_seq_sf; break; + case NE: reverse_regs = 0; invert = 1; gen_fn = gen_seq_sf; break; + case LE: reverse_regs = 0; invert = 0; gen_fn = gen_sle_sf; break; + case GT: reverse_regs = 1; invert = 0; gen_fn = gen_slt_sf; break; + case LT: reverse_regs = 0; invert = 0; gen_fn = gen_slt_sf; break; + case GE: reverse_regs = 1; invert = 0; gen_fn = gen_sle_sf; break; + case UNEQ: reverse_regs = 0; invert = 0; gen_fn = gen_suneq_sf; break; + case LTGT: reverse_regs = 0; invert = 1; gen_fn = gen_suneq_sf; break; + case UNLE: reverse_regs = 0; invert = 0; gen_fn = gen_sunle_sf; break; + case UNGT: reverse_regs = 1; invert = 0; gen_fn = gen_sunlt_sf; break; + case UNLT: reverse_regs = 0; invert = 0; gen_fn = gen_sunlt_sf; break; + case UNGE: reverse_regs = 1; invert = 0; gen_fn = gen_sunle_sf; break; + case UNORDERED: + reverse_regs = 0; invert = 0; gen_fn = gen_sunordered_sf; break; + case ORDERED: + reverse_regs = 0; invert = 1; gen_fn = gen_sunordered_sf; break; + default: + fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); + reverse_regs = 0; invert = 0; gen_fn = 0; /* avoid compiler warnings */ + } + + if (reverse_regs) + { + rtx temp = cmp0; + cmp0 = cmp1; + cmp1 = temp; + } + + brtmp = gen_rtx_REG (CCmode, FPCC_REGNUM); + emit_insn (gen_fn (brtmp, cmp0, cmp1)); + + return gen_rtx_fmt_ee (invert ? EQ : NE, VOIDmode, brtmp, const0_rtx); +} + + +void +xtensa_expand_conditional_branch (rtx *operands, enum machine_mode mode) +{ + enum rtx_code test_code = GET_CODE (operands[0]); + rtx cmp0 = operands[1]; + rtx cmp1 = operands[2]; + rtx cmp; + int invert; + rtx label1, label2; + + switch (mode) + { + case DFmode: + default: + fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1)); + + case SImode: + invert = FALSE; + cmp = gen_int_relational (test_code, cmp0, cmp1, &invert); + break; + + case SFmode: + if (!TARGET_HARD_FLOAT) + fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, + cmp0, cmp1)); + invert = FALSE; + cmp = gen_float_relational (test_code, cmp0, cmp1); + break; + } + + /* Generate the branch. */ + + label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + label2 = pc_rtx; + + if (invert) + { + label2 = label1; + label1 = pc_rtx; + } + + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, cmp, + label1, + label2))); +} + + +static rtx +gen_conditional_move (enum rtx_code code, enum machine_mode mode, + rtx op0, rtx op1) +{ + if (mode == SImode) + { + rtx cmp; + + /* Jump optimization calls get_condition() which canonicalizes + comparisons like (GE x <const>) to (GT x <const-1>). + Transform those comparisons back to GE, since that is the + comparison supported in Xtensa. We shouldn't have to + transform <LE x const> comparisons, because neither + xtensa_expand_conditional_branch() nor get_condition() will + produce them. */ + + if ((code == GT) && (op1 == constm1_rtx)) + { + code = GE; + op1 = const0_rtx; + } + cmp = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx); + + if (boolean_operator (cmp, VOIDmode)) + { + /* Swap the operands to make const0 second. */ + if (op0 == const0_rtx) + { + op0 = op1; + op1 = const0_rtx; + } + + /* If not comparing against zero, emit a comparison (subtract). */ + if (op1 != const0_rtx) + { + op0 = expand_binop (SImode, sub_optab, op0, op1, + 0, 0, OPTAB_LIB_WIDEN); + op1 = const0_rtx; + } + } + else if (branch_operator (cmp, VOIDmode)) + { + /* Swap the operands to make const0 second. */ + if (op0 == const0_rtx) + { + op0 = op1; + op1 = const0_rtx; + + switch (code) + { + case LT: code = GE; break; + case GE: code = LT; break; + default: gcc_unreachable (); + } + } + + if (op1 != const0_rtx) + return 0; + } + else + return 0; + + return gen_rtx_fmt_ee (code, VOIDmode, op0, op1); + } + + if (TARGET_HARD_FLOAT && mode == SFmode) + return gen_float_relational (code, op0, op1); + + return 0; +} + + +int +xtensa_expand_conditional_move (rtx *operands, int isflt) +{ + rtx dest = operands[0]; + rtx cmp = operands[1]; + enum machine_mode cmp_mode = GET_MODE (XEXP (cmp, 0)); + rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx); + + if (!(cmp = gen_conditional_move (GET_CODE (cmp), cmp_mode, + XEXP (cmp, 0), XEXP (cmp, 1)))) + return 0; + + if (isflt) + gen_fn = (cmp_mode == SImode + ? gen_movsfcc_internal0 + : gen_movsfcc_internal1); + else + gen_fn = (cmp_mode == SImode + ? gen_movsicc_internal0 + : gen_movsicc_internal1); + + emit_insn (gen_fn (dest, XEXP (cmp, 0), operands[2], operands[3], cmp)); + return 1; +} + + +int +xtensa_expand_scc (rtx operands[4], enum machine_mode cmp_mode) +{ + rtx dest = operands[0]; + rtx cmp; + rtx one_tmp, zero_tmp; + rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx); + + if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode, + operands[2], operands[3]))) + return 0; + + one_tmp = gen_reg_rtx (SImode); + zero_tmp = gen_reg_rtx (SImode); + emit_insn (gen_movsi (one_tmp, const_true_rtx)); + emit_insn (gen_movsi (zero_tmp, const0_rtx)); + + gen_fn = (cmp_mode == SImode + ? gen_movsicc_internal0 + : gen_movsicc_internal1); + emit_insn (gen_fn (dest, XEXP (cmp, 0), one_tmp, zero_tmp, cmp)); + return 1; +} + + +/* Split OP[1] into OP[2,3] and likewise for OP[0] into OP[0,1]. MODE is + for the output, i.e., the input operands are twice as big as MODE. */ + +void +xtensa_split_operand_pair (rtx operands[4], enum machine_mode mode) +{ + switch (GET_CODE (operands[1])) + { + case REG: + operands[3] = gen_rtx_REG (mode, REGNO (operands[1]) + 1); + operands[2] = gen_rtx_REG (mode, REGNO (operands[1])); + break; + + case MEM: + operands[3] = adjust_address (operands[1], mode, GET_MODE_SIZE (mode)); + operands[2] = adjust_address (operands[1], mode, 0); + break; + + case CONST_INT: + case CONST_DOUBLE: + split_double (operands[1], &operands[2], &operands[3]); + break; + + default: + gcc_unreachable (); + } + + switch (GET_CODE (operands[0])) + { + case REG: + operands[1] = gen_rtx_REG (mode, REGNO (operands[0]) + 1); + operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); + break; + + case MEM: + operands[1] = adjust_address (operands[0], mode, GET_MODE_SIZE (mode)); + operands[0] = adjust_address (operands[0], mode, 0); + break; + + default: + gcc_unreachable (); + } +} + + +/* Emit insns to move operands[1] into operands[0]. + Return 1 if we have written out everything that needs to be done to + do the move. Otherwise, return 0 and the caller will emit the move + normally. */ + +int +xtensa_emit_move_sequence (rtx *operands, enum machine_mode mode) +{ + rtx src = operands[1]; + + if (CONSTANT_P (src) + && (GET_CODE (src) != CONST_INT || ! xtensa_simm12b (INTVAL (src)))) + { + rtx dst = operands[0]; + + if (xtensa_tls_referenced_p (src)) + { + rtx addend = NULL; + + if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS) + { + addend = XEXP (XEXP (src, 0), 1); + src = XEXP (XEXP (src, 0), 0); + } + + src = xtensa_legitimize_tls_address (src); + if (addend) + { + src = gen_rtx_PLUS (mode, src, addend); + src = force_operand (src, dst); + } + emit_move_insn (dst, src); + return 1; + } + + if (! TARGET_CONST16) + { + src = force_const_mem (SImode, src); + operands[1] = src; + } + + /* PC-relative loads are always SImode, and CONST16 is only + supported in the movsi pattern, so add a SUBREG for any other + (smaller) mode. */ + + if (mode != SImode) + { + if (register_operand (dst, mode)) + { + emit_move_insn (simplify_gen_subreg (SImode, dst, mode, 0), src); + return 1; + } + else + { + src = force_reg (SImode, src); + src = gen_lowpart_SUBREG (mode, src); + operands[1] = src; + } + } + } + + if (!(reload_in_progress | reload_completed) + && !xtensa_valid_move (mode, operands)) + operands[1] = force_reg (mode, operands[1]); + + operands[1] = xtensa_copy_incoming_a7 (operands[1]); + + /* During reload we don't want to emit (subreg:X (mem:Y)) since that + instruction won't be recognized after reload, so we remove the + subreg and adjust mem accordingly. */ + if (reload_in_progress) + { + operands[0] = fixup_subreg_mem (operands[0]); + operands[1] = fixup_subreg_mem (operands[1]); + } + return 0; +} + + +static rtx +fixup_subreg_mem (rtx x) +{ + if (GET_CODE (x) == SUBREG + && GET_CODE (SUBREG_REG (x)) == REG + && REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER) + { + rtx temp = + gen_rtx_SUBREG (GET_MODE (x), + reg_equiv_mem [REGNO (SUBREG_REG (x))], + SUBREG_BYTE (x)); + x = alter_subreg (&temp); + } + return x; +} + + +/* Check if an incoming argument in a7 is expected to be used soon and + if OPND is a register or register pair that includes a7. If so, + create a new pseudo and copy a7 into that pseudo at the very + beginning of the function, followed by the special "set_frame_ptr" + unspec_volatile insn. The return value is either the original + operand, if it is not a7, or the new pseudo containing a copy of + the incoming argument. This is necessary because the register + allocator will ignore conflicts with a7 and may either assign some + other pseudo to a7 or use a7 as the hard_frame_pointer, clobbering + the incoming argument in a7. By copying the argument out of a7 as + the very first thing, and then immediately following that with an + unspec_volatile to keep the scheduler away, we should avoid any + problems. Putting the set_frame_ptr insn at the beginning, with + only the a7 copy before it, also makes it easier for the prologue + expander to initialize the frame pointer after the a7 copy and to + fix up the a7 copy to use the stack pointer instead of the frame + pointer. */ + +rtx +xtensa_copy_incoming_a7 (rtx opnd) +{ + rtx entry_insns = 0; + rtx reg, tmp; + enum machine_mode mode; + + if (!cfun->machine->need_a7_copy) + return opnd; + + /* This function should never be called again once a7 has been copied. */ + gcc_assert (!cfun->machine->set_frame_ptr_insn); + + mode = GET_MODE (opnd); + + /* The operand using a7 may come in a later instruction, so just return + the original operand if it doesn't use a7. */ + reg = opnd; + if (GET_CODE (reg) == SUBREG) + { + gcc_assert (SUBREG_BYTE (reg) == 0); + reg = SUBREG_REG (reg); + } + if (GET_CODE (reg) != REG + || REGNO (reg) > A7_REG + || REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) <= A7_REG) + return opnd; + + /* 1-word args will always be in a7; 2-word args in a6/a7. */ + gcc_assert (REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) - 1 == A7_REG); + + cfun->machine->need_a7_copy = false; + + /* Copy a7 to a new pseudo at the function entry. Use gen_raw_REG to + create the REG for a7 so that hard_frame_pointer_rtx is not used. */ + + start_sequence (); + tmp = gen_reg_rtx (mode); + + switch (mode) + { + case DFmode: + case DImode: + /* Copy the value out of A7 here but keep the first word in A6 until + after the set_frame_ptr insn. Otherwise, the register allocator + may decide to put "subreg (tmp, 0)" in A7 and clobber the incoming + value. */ + emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 4), + gen_raw_REG (SImode, A7_REG))); + break; + case SFmode: + emit_insn (gen_movsf_internal (tmp, gen_raw_REG (mode, A7_REG))); + break; + case SImode: + emit_insn (gen_movsi_internal (tmp, gen_raw_REG (mode, A7_REG))); + break; + case HImode: + emit_insn (gen_movhi_internal (tmp, gen_raw_REG (mode, A7_REG))); + break; + case QImode: + emit_insn (gen_movqi_internal (tmp, gen_raw_REG (mode, A7_REG))); + break; + default: + gcc_unreachable (); + } + + cfun->machine->set_frame_ptr_insn = emit_insn (gen_set_frame_ptr ()); + + /* For DF and DI mode arguments, copy the incoming value in A6 now. */ + if (mode == DFmode || mode == DImode) + emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 0), + gen_rtx_REG (SImode, A7_REG - 1))); + entry_insns = get_insns (); + end_sequence (); + + if (cfun->machine->vararg_a7) + { + /* This is called from within builtin_saveregs, which will insert the + saveregs code at the function entry, ahead of anything placed at + the function entry now. Instead, save the sequence to be inserted + at the beginning of the saveregs code. */ + cfun->machine->vararg_a7_copy = entry_insns; + } + else + { + /* Put entry_insns after the NOTE that starts the function. If + this is inside a start_sequence, make the outer-level insn + chain current, so the code is placed at the start of the + function. */ + push_topmost_sequence (); + /* Do not use entry_of_function() here. This is called from within + expand_function_start, when the CFG still holds GIMPLE. */ + emit_insn_after (entry_insns, get_insns ()); + pop_topmost_sequence (); + } + + return tmp; +} + + +/* Try to expand a block move operation to a sequence of RTL move + instructions. If not optimizing, or if the block size is not a + constant, or if the block is too large, the expansion fails and GCC + falls back to calling memcpy(). + + operands[0] is the destination + operands[1] is the source + operands[2] is the length + operands[3] is the alignment */ + +int +xtensa_expand_block_move (rtx *operands) +{ + static const enum machine_mode mode_from_align[] = + { + VOIDmode, QImode, HImode, VOIDmode, SImode, + }; + + rtx dst_mem = operands[0]; + rtx src_mem = operands[1]; + HOST_WIDE_INT bytes, align; + int num_pieces, move_ratio; + rtx temp[2]; + enum machine_mode mode[2]; + int amount[2]; + bool active[2]; + int phase = 0; + int next; + int offset_ld = 0; + int offset_st = 0; + rtx x; + + /* If this is not a fixed size move, just call memcpy. */ + if (!optimize || (GET_CODE (operands[2]) != CONST_INT)) + return 0; + + bytes = INTVAL (operands[2]); + align = INTVAL (operands[3]); + + /* Anything to move? */ + if (bytes <= 0) + return 0; + + if (align > MOVE_MAX) + align = MOVE_MAX; + + /* Decide whether to expand inline based on the optimization level. */ + move_ratio = 4; + if (optimize > 2) + move_ratio = LARGEST_MOVE_RATIO; + num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */ + if (num_pieces > move_ratio) + return 0; + + x = XEXP (dst_mem, 0); + if (!REG_P (x)) + { + x = force_reg (Pmode, x); + dst_mem = replace_equiv_address (dst_mem, x); + } + + x = XEXP (src_mem, 0); + if (!REG_P (x)) + { + x = force_reg (Pmode, x); + src_mem = replace_equiv_address (src_mem, x); + } + + active[0] = active[1] = false; + + do + { + next = phase; + phase ^= 1; + + if (bytes > 0) + { + int next_amount; + + next_amount = (bytes >= 4 ? 4 : (bytes >= 2 ? 2 : 1)); + next_amount = MIN (next_amount, align); + + amount[next] = next_amount; + mode[next] = mode_from_align[next_amount]; + temp[next] = gen_reg_rtx (mode[next]); + + x = adjust_address (src_mem, mode[next], offset_ld); + emit_insn (gen_rtx_SET (VOIDmode, temp[next], x)); + + offset_ld += next_amount; + bytes -= next_amount; + active[next] = true; + } + + if (active[phase]) + { + active[phase] = false; + + x = adjust_address (dst_mem, mode[phase], offset_st); + emit_insn (gen_rtx_SET (VOIDmode, x, temp[phase])); + + offset_st += amount[phase]; + } + } + while (active[next]); + + return 1; +} + + +void +xtensa_expand_nonlocal_goto (rtx *operands) +{ + rtx goto_handler = operands[1]; + rtx containing_fp = operands[3]; + + /* Generate a call to "__xtensa_nonlocal_goto" (in libgcc); the code + is too big to generate in-line. */ + + if (GET_CODE (containing_fp) != REG) + containing_fp = force_reg (Pmode, containing_fp); + + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_nonlocal_goto"), + LCT_NORMAL, VOIDmode, 2, + containing_fp, Pmode, + goto_handler, Pmode); +} + + +static struct machine_function * +xtensa_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + + +/* Shift VAL of mode MODE left by COUNT bits. */ + +static inline rtx +xtensa_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count) +{ + val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)), + NULL_RTX, 1, OPTAB_DIRECT); + return expand_simple_binop (SImode, ASHIFT, val, count, + NULL_RTX, 1, OPTAB_DIRECT); +} + + +/* Structure to hold the initial parameters for a compare_and_swap operation + in HImode and QImode. */ + +struct alignment_context +{ + rtx memsi; /* SI aligned memory location. */ + rtx shift; /* Bit offset with regard to lsb. */ + rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */ + rtx modemaski; /* ~modemask */ +}; + + +/* Initialize structure AC for word access to HI and QI mode memory. */ + +static void +init_alignment_context (struct alignment_context *ac, rtx mem) +{ + enum machine_mode mode = GET_MODE (mem); + rtx byteoffset = NULL_RTX; + bool aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode)); + + if (aligned) + ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */ + else + { + /* Alignment is unknown. */ + rtx addr, align; + + /* Force the address into a register. */ + addr = force_reg (Pmode, XEXP (mem, 0)); + + /* Align it to SImode. */ + align = expand_simple_binop (Pmode, AND, addr, + GEN_INT (-GET_MODE_SIZE (SImode)), + NULL_RTX, 1, OPTAB_DIRECT); + /* Generate MEM. */ + ac->memsi = gen_rtx_MEM (SImode, align); + MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem); + set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER); + set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode)); + + byteoffset = expand_simple_binop (Pmode, AND, addr, + GEN_INT (GET_MODE_SIZE (SImode) - 1), + NULL_RTX, 1, OPTAB_DIRECT); + } + + /* Calculate shiftcount. */ + if (TARGET_BIG_ENDIAN) + { + ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode)); + if (!aligned) + ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset, + NULL_RTX, 1, OPTAB_DIRECT); + } + else + { + if (aligned) + ac->shift = NULL_RTX; + else + ac->shift = byteoffset; + } + + if (ac->shift != NULL_RTX) + { + /* Shift is the byte count, but we need the bitcount. */ + ac->shift = expand_simple_binop (SImode, MULT, ac->shift, + GEN_INT (BITS_PER_UNIT), + NULL_RTX, 1, OPTAB_DIRECT); + ac->modemask = expand_simple_binop (SImode, ASHIFT, + GEN_INT (GET_MODE_MASK (mode)), + ac->shift, + NULL_RTX, 1, OPTAB_DIRECT); + } + else + ac->modemask = GEN_INT (GET_MODE_MASK (mode)); + + ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1); +} + + +/* Expand an atomic compare and swap operation for HImode and QImode. + MEM is the memory location, CMP the old value to compare MEM with + and NEW_RTX the value to set if CMP == MEM. */ + +void +xtensa_expand_compare_and_swap (rtx target, rtx mem, rtx cmp, rtx new_rtx) +{ + enum machine_mode mode = GET_MODE (mem); + struct alignment_context ac; + rtx tmp, cmpv, newv, val; + rtx oldval = gen_reg_rtx (SImode); + rtx res = gen_reg_rtx (SImode); + rtx csloop = gen_label_rtx (); + rtx csend = gen_label_rtx (); + + init_alignment_context (&ac, mem); + + if (ac.shift != NULL_RTX) + { + cmp = xtensa_expand_mask_and_shift (cmp, mode, ac.shift); + new_rtx = xtensa_expand_mask_and_shift (new_rtx, mode, ac.shift); + } + + /* Load the surrounding word into VAL with the MEM value masked out. */ + val = force_reg (SImode, expand_simple_binop (SImode, AND, ac.memsi, + ac.modemaski, NULL_RTX, 1, + OPTAB_DIRECT)); + emit_label (csloop); + + /* Patch CMP and NEW_RTX into VAL at correct position. */ + cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val, + NULL_RTX, 1, OPTAB_DIRECT)); + newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val, + NULL_RTX, 1, OPTAB_DIRECT)); + + /* Jump to end if we're done. */ + emit_insn (gen_sync_compare_and_swapsi (res, ac.memsi, cmpv, newv)); + emit_cmp_and_jump_insns (res, cmpv, EQ, const0_rtx, SImode, true, csend); + + /* Check for changes outside mode. */ + emit_move_insn (oldval, val); + tmp = expand_simple_binop (SImode, AND, res, ac.modemaski, + val, 1, OPTAB_DIRECT); + if (tmp != val) + emit_move_insn (val, tmp); + + /* Loop internal if so. */ + emit_cmp_and_jump_insns (oldval, val, NE, const0_rtx, SImode, true, csloop); + + emit_label (csend); + + /* Return the correct part of the bitfield. */ + convert_move (target, + (ac.shift == NULL_RTX ? res + : expand_simple_binop (SImode, LSHIFTRT, res, ac.shift, + NULL_RTX, 1, OPTAB_DIRECT)), + 1); +} + + +/* Expand an atomic operation CODE of mode MODE (either HImode or QImode -- + the default expansion works fine for SImode). MEM is the memory location + and VAL the value to play with. If AFTER is true then store the value + MEM holds after the operation, if AFTER is false then store the value MEM + holds before the operation. If TARGET is zero then discard that value, else + store it to TARGET. */ + +void +xtensa_expand_atomic (enum rtx_code code, rtx target, rtx mem, rtx val, + bool after) +{ + enum machine_mode mode = GET_MODE (mem); + struct alignment_context ac; + rtx csloop = gen_label_rtx (); + rtx cmp, tmp; + rtx old = gen_reg_rtx (SImode); + rtx new_rtx = gen_reg_rtx (SImode); + rtx orig = NULL_RTX; + + init_alignment_context (&ac, mem); + + /* Prepare values before the compare-and-swap loop. */ + if (ac.shift != NULL_RTX) + val = xtensa_expand_mask_and_shift (val, mode, ac.shift); + switch (code) + { + case PLUS: + case MINUS: + orig = gen_reg_rtx (SImode); + convert_move (orig, val, 1); + break; + + case SET: + case IOR: + case XOR: + break; + + case MULT: /* NAND */ + case AND: + /* val = "11..1<val>11..1" */ + val = expand_simple_binop (SImode, XOR, val, ac.modemaski, + NULL_RTX, 1, OPTAB_DIRECT); + break; + + default: + gcc_unreachable (); + } + + /* Load full word. Subsequent loads are performed by S32C1I. */ + cmp = force_reg (SImode, ac.memsi); + + emit_label (csloop); + emit_move_insn (old, cmp); + + switch (code) + { + case PLUS: + case MINUS: + val = expand_simple_binop (SImode, code, old, orig, + NULL_RTX, 1, OPTAB_DIRECT); + val = expand_simple_binop (SImode, AND, val, ac.modemask, + NULL_RTX, 1, OPTAB_DIRECT); + /* FALLTHRU */ + case SET: + tmp = expand_simple_binop (SImode, AND, old, ac.modemaski, + NULL_RTX, 1, OPTAB_DIRECT); + tmp = expand_simple_binop (SImode, IOR, tmp, val, + new_rtx, 1, OPTAB_DIRECT); + break; + + case AND: + case IOR: + case XOR: + tmp = expand_simple_binop (SImode, code, old, val, + new_rtx, 1, OPTAB_DIRECT); + break; + + case MULT: /* NAND */ + tmp = expand_simple_binop (SImode, XOR, old, ac.modemask, + NULL_RTX, 1, OPTAB_DIRECT); + tmp = expand_simple_binop (SImode, AND, tmp, val, + new_rtx, 1, OPTAB_DIRECT); + break; + + default: + gcc_unreachable (); + } + + if (tmp != new_rtx) + emit_move_insn (new_rtx, tmp); + emit_insn (gen_sync_compare_and_swapsi (cmp, ac.memsi, old, new_rtx)); + emit_cmp_and_jump_insns (cmp, old, NE, const0_rtx, SImode, true, csloop); + + if (target) + { + tmp = (after ? new_rtx : cmp); + convert_move (target, + (ac.shift == NULL_RTX ? tmp + : expand_simple_binop (SImode, LSHIFTRT, tmp, ac.shift, + NULL_RTX, 1, OPTAB_DIRECT)), + 1); + } +} + + +void +xtensa_setup_frame_addresses (void) +{ + /* Set flag to cause TARGET_FRAME_POINTER_REQUIRED to return true. */ + cfun->machine->accesses_prev_frame = 1; + + emit_library_call + (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_libgcc_window_spill"), + LCT_NORMAL, VOIDmode, 0); +} + + +/* Emit the assembly for the end of a zero-cost loop. Normally we just emit + a comment showing where the end of the loop is. However, if there is a + label or a branch at the end of the loop then we need to place a nop + there. If the loop ends with a label we need the nop so that branches + targeting that label will target the nop (and thus remain in the loop), + instead of targeting the instruction after the loop (and thus exiting + the loop). If the loop ends with a branch, we need the nop in case the + branch is targeting a location inside the loop. When the branch + executes it will cause the loop count to be decremented even if it is + taken (because it is the last instruction in the loop), so we need to + nop after the branch to prevent the loop count from being decremented + when the branch is taken. */ + +void +xtensa_emit_loop_end (rtx insn, rtx *operands) +{ + char done = 0; + + for (insn = PREV_INSN (insn); insn && !done; insn = PREV_INSN (insn)) + { + switch (GET_CODE (insn)) + { + case NOTE: + case BARRIER: + break; + + case CODE_LABEL: + output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands); + done = 1; + break; + + default: + { + rtx body = PATTERN (insn); + + if (GET_CODE (body) == JUMP_INSN) + { + output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands); + done = 1; + } + else if ((GET_CODE (body) != USE) + && (GET_CODE (body) != CLOBBER)) + done = 1; + } + break; + } + } + + output_asm_insn ("# loop end for %0", operands); +} + + +char * +xtensa_emit_branch (bool inverted, bool immed, rtx *operands) +{ + static char result[64]; + enum rtx_code code; + const char *op; + + code = GET_CODE (operands[3]); + switch (code) + { + case EQ: op = inverted ? "ne" : "eq"; break; + case NE: op = inverted ? "eq" : "ne"; break; + case LT: op = inverted ? "ge" : "lt"; break; + case GE: op = inverted ? "lt" : "ge"; break; + case LTU: op = inverted ? "geu" : "ltu"; break; + case GEU: op = inverted ? "ltu" : "geu"; break; + default: gcc_unreachable (); + } + + if (immed) + { + if (INTVAL (operands[1]) == 0) + sprintf (result, "b%sz%s\t%%0, %%2", op, + (TARGET_DENSITY && (code == EQ || code == NE)) ? ".n" : ""); + else + sprintf (result, "b%si\t%%0, %%d1, %%2", op); + } + else + sprintf (result, "b%s\t%%0, %%1, %%2", op); + + return result; +} + + +char * +xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands) +{ + static char result[64]; + const char *op; + + switch (GET_CODE (operands[3])) + { + case EQ: op = inverted ? "bs" : "bc"; break; + case NE: op = inverted ? "bc" : "bs"; break; + default: gcc_unreachable (); + } + + if (immed) + { + unsigned bitnum = INTVAL (operands[1]) & 0x1f; + operands[1] = GEN_INT (bitnum); + sprintf (result, "b%si\t%%0, %%d1, %%2", op); + } + else + sprintf (result, "b%s\t%%0, %%1, %%2", op); + + return result; +} + + +char * +xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands) +{ + static char result[64]; + enum rtx_code code; + const char *op; + + code = GET_CODE (operands[4]); + if (isbool) + { + switch (code) + { + case EQ: op = inverted ? "t" : "f"; break; + case NE: op = inverted ? "f" : "t"; break; + default: gcc_unreachable (); + } + } + else + { + switch (code) + { + case EQ: op = inverted ? "nez" : "eqz"; break; + case NE: op = inverted ? "eqz" : "nez"; break; + case LT: op = inverted ? "gez" : "ltz"; break; + case GE: op = inverted ? "ltz" : "gez"; break; + default: gcc_unreachable (); + } + } + + sprintf (result, "mov%s%s\t%%0, %%%d, %%1", + op, isfp ? ".s" : "", inverted ? 3 : 2); + return result; +} + + +char * +xtensa_emit_call (int callop, rtx *operands) +{ + static char result[64]; + rtx tgt = operands[callop]; + + if (GET_CODE (tgt) == CONST_INT) + sprintf (result, "call8\t0x%lx", INTVAL (tgt)); + else if (register_operand (tgt, VOIDmode)) + sprintf (result, "callx8\t%%%d", callop); + else + sprintf (result, "call8\t%%%d", callop); + + return result; +} + + +bool +xtensa_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict) +{ + /* Allow constant pool addresses. */ + if (mode != BLKmode && GET_MODE_SIZE (mode) >= UNITS_PER_WORD + && ! TARGET_CONST16 && constantpool_address_p (addr) + && ! xtensa_tls_referenced_p (addr)) + return true; + + while (GET_CODE (addr) == SUBREG) + addr = SUBREG_REG (addr); + + /* Allow base registers. */ + if (GET_CODE (addr) == REG && BASE_REG_P (addr, strict)) + return true; + + /* Check for "register + offset" addressing. */ + if (GET_CODE (addr) == PLUS) + { + rtx xplus0 = XEXP (addr, 0); + rtx xplus1 = XEXP (addr, 1); + enum rtx_code code0; + enum rtx_code code1; + + while (GET_CODE (xplus0) == SUBREG) + xplus0 = SUBREG_REG (xplus0); + code0 = GET_CODE (xplus0); + + while (GET_CODE (xplus1) == SUBREG) + xplus1 = SUBREG_REG (xplus1); + code1 = GET_CODE (xplus1); + + /* Swap operands if necessary so the register is first. */ + if (code0 != REG && code1 == REG) + { + xplus0 = XEXP (addr, 1); + xplus1 = XEXP (addr, 0); + code0 = GET_CODE (xplus0); + code1 = GET_CODE (xplus1); + } + + if (code0 == REG && BASE_REG_P (xplus0, strict) + && code1 == CONST_INT + && xtensa_mem_offset (INTVAL (xplus1), mode)) + return true; + } + + return false; +} + + +/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ + +static GTY(()) rtx xtensa_tls_module_base_symbol; + +static rtx +xtensa_tls_module_base (void) +{ + if (! xtensa_tls_module_base_symbol) + { + xtensa_tls_module_base_symbol = + gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_"); + SYMBOL_REF_FLAGS (xtensa_tls_module_base_symbol) + |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; + } + + return xtensa_tls_module_base_symbol; +} + + +static rtx +xtensa_call_tls_desc (rtx sym, rtx *retp) +{ + rtx fn, arg, a10, call_insn, insns; + + start_sequence (); + fn = gen_reg_rtx (Pmode); + arg = gen_reg_rtx (Pmode); + a10 = gen_rtx_REG (Pmode, 10); + + emit_insn (gen_tls_func (fn, sym)); + emit_insn (gen_tls_arg (arg, sym)); + emit_move_insn (a10, arg); + call_insn = emit_call_insn (gen_tls_call (a10, fn, sym, const1_rtx)); + CALL_INSN_FUNCTION_USAGE (call_insn) + = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, a10), + CALL_INSN_FUNCTION_USAGE (call_insn)); + insns = get_insns (); + end_sequence (); + + *retp = a10; + return insns; +} + + +static rtx +xtensa_legitimize_tls_address (rtx x) +{ + unsigned int model = SYMBOL_REF_TLS_MODEL (x); + rtx dest, tp, ret, modbase, base, addend, insns; + + dest = gen_reg_rtx (Pmode); + switch (model) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + insns = xtensa_call_tls_desc (x, &ret); + emit_libcall_block (insns, dest, ret, x); + break; + + case TLS_MODEL_LOCAL_DYNAMIC: + base = gen_reg_rtx (Pmode); + modbase = xtensa_tls_module_base (); + insns = xtensa_call_tls_desc (modbase, &ret); + emit_libcall_block (insns, base, ret, modbase); + addend = force_reg (SImode, gen_sym_DTPOFF (x)); + emit_insn (gen_addsi3 (dest, base, addend)); + break; + + case TLS_MODEL_INITIAL_EXEC: + case TLS_MODEL_LOCAL_EXEC: + tp = gen_reg_rtx (SImode); + emit_insn (gen_load_tp (tp)); + addend = force_reg (SImode, gen_sym_TPOFF (x)); + emit_insn (gen_addsi3 (dest, tp, addend)); + break; + + default: + gcc_unreachable (); + } + + return dest; +} + + +rtx +xtensa_legitimize_address (rtx x, + rtx oldx ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + if (xtensa_tls_symbol_p (x)) + return xtensa_legitimize_tls_address (x); + + if (GET_CODE (x) == PLUS) + { + rtx plus0 = XEXP (x, 0); + rtx plus1 = XEXP (x, 1); + + if (GET_CODE (plus0) != REG && GET_CODE (plus1) == REG) + { + plus0 = XEXP (x, 1); + plus1 = XEXP (x, 0); + } + + /* Try to split up the offset to use an ADDMI instruction. */ + if (GET_CODE (plus0) == REG + && GET_CODE (plus1) == CONST_INT + && !xtensa_mem_offset (INTVAL (plus1), mode) + && !xtensa_simm8 (INTVAL (plus1)) + && xtensa_mem_offset (INTVAL (plus1) & 0xff, mode) + && xtensa_simm8x256 (INTVAL (plus1) & ~0xff)) + { + rtx temp = gen_reg_rtx (Pmode); + rtx addmi_offset = GEN_INT (INTVAL (plus1) & ~0xff); + emit_insn (gen_rtx_SET (Pmode, temp, + gen_rtx_PLUS (Pmode, plus0, addmi_offset))); + return gen_rtx_PLUS (Pmode, temp, GEN_INT (INTVAL (plus1) & 0xff)); + } + } + + return x; +} + +/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P. + + Treat constant-pool references as "mode dependent" since they can + only be accessed with SImode loads. This works around a bug in the + combiner where a constant pool reference is temporarily converted + to an HImode load, which is then assumed to zero-extend based on + our definition of LOAD_EXTEND_OP. This is wrong because the high + bits of a 16-bit value in the constant pool are now sign-extended + by default. */ + +static bool +xtensa_mode_dependent_address_p (const_rtx addr) +{ + return constantpool_address_p (addr); +} + +/* Helper for xtensa_tls_referenced_p. */ + +static int +xtensa_tls_referenced_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED) +{ + if (GET_CODE (*x) == SYMBOL_REF) + return SYMBOL_REF_TLS_MODEL (*x) != 0; + + /* Ignore TLS references that have already been legitimized. */ + if (GET_CODE (*x) == UNSPEC) + { + switch (XINT (*x, 1)) + { + case UNSPEC_TPOFF: + case UNSPEC_DTPOFF: + case UNSPEC_TLS_FUNC: + case UNSPEC_TLS_ARG: + case UNSPEC_TLS_CALL: + return -1; + default: + break; + } + } + + return 0; +} + + +/* Return TRUE if X contains any TLS symbol references. */ + +bool +xtensa_tls_referenced_p (rtx x) +{ + if (! TARGET_HAVE_TLS) + return false; + + return for_each_rtx (&x, xtensa_tls_referenced_p_1, NULL); +} + + +/* Return the debugger register number to use for 'regno'. */ + +int +xtensa_dbx_register_number (int regno) +{ + int first = -1; + + if (GP_REG_P (regno)) + { + regno -= GP_REG_FIRST; + first = 0; + } + else if (BR_REG_P (regno)) + { + regno -= BR_REG_FIRST; + first = 16; + } + else if (FP_REG_P (regno)) + { + regno -= FP_REG_FIRST; + first = 48; + } + else if (ACC_REG_P (regno)) + { + first = 0x200; /* Start of Xtensa special registers. */ + regno = 16; /* ACCLO is special register 16. */ + } + + /* When optimizing, we sometimes get asked about pseudo-registers + that don't represent hard registers. Return 0 for these. */ + if (first == -1) + return 0; + + return first + regno; +} + + +/* Argument support functions. */ + +/* Initialize CUMULATIVE_ARGS for a function. */ + +void +init_cumulative_args (CUMULATIVE_ARGS *cum, int incoming) +{ + cum->arg_words = 0; + cum->incoming = incoming; +} + + +/* Advance the argument to the next argument position. */ + +static void +xtensa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + int words, max; + int *arg_words; + + arg_words = &cum->arg_words; + max = MAX_ARGS_IN_REGISTERS; + + words = (((mode != BLKmode) + ? (int) GET_MODE_SIZE (mode) + : int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + if (*arg_words < max + && (targetm.calls.must_pass_in_stack (mode, type) + || *arg_words + words > max)) + *arg_words = max; + + *arg_words += words; +} + + +/* Return an RTL expression containing the register for the given mode, + or 0 if the argument is to be passed on the stack. INCOMING_P is nonzero + if this is an incoming argument to the current function. */ + +static rtx +xtensa_function_arg_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool incoming_p) +{ + int regbase, words, max; + int *arg_words; + int regno; + + arg_words = &cum->arg_words; + regbase = (incoming_p ? GP_ARG_FIRST : GP_OUTGOING_ARG_FIRST); + max = MAX_ARGS_IN_REGISTERS; + + words = (((mode != BLKmode) + ? (int) GET_MODE_SIZE (mode) + : int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + if (type && (TYPE_ALIGN (type) > BITS_PER_WORD)) + { + int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_WORD; + *arg_words = (*arg_words + align - 1) & -align; + } + + if (*arg_words + words > max) + return (rtx)0; + + regno = regbase + *arg_words; + + if (cum->incoming && regno <= A7_REG && regno + words > A7_REG) + cfun->machine->need_a7_copy = true; + + return gen_rtx_REG (mode, regno); +} + +/* Implement TARGET_FUNCTION_ARG. */ + +static rtx +xtensa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + return xtensa_function_arg_1 (cum, mode, type, false); +} + +/* Implement TARGET_FUNCTION_INCOMING_ARG. */ + +static rtx +xtensa_function_incoming_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + return xtensa_function_arg_1 (cum, mode, type, true); +} + +static unsigned int +xtensa_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + unsigned int alignment; + + alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode); + if (alignment < PARM_BOUNDARY) + alignment = PARM_BOUNDARY; + if (alignment > STACK_BOUNDARY) + alignment = STACK_BOUNDARY; + return alignment; +} + + +static bool +xtensa_return_in_msb (const_tree valtype) +{ + return (TARGET_BIG_ENDIAN + && AGGREGATE_TYPE_P (valtype) + && int_size_in_bytes (valtype) >= UNITS_PER_WORD); +} + + +static void +xtensa_option_override (void) +{ + int regno; + enum machine_mode mode; + + if (!TARGET_BOOLEANS && TARGET_HARD_FLOAT) + error ("boolean registers required for the floating-point option"); + + /* Set up array giving whether a given register can hold a given mode. */ + for (mode = VOIDmode; + mode != MAX_MACHINE_MODE; + mode = (enum machine_mode) ((int) mode + 1)) + { + int size = GET_MODE_SIZE (mode); + enum mode_class mclass = GET_MODE_CLASS (mode); + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + { + int temp; + + if (ACC_REG_P (regno)) + temp = (TARGET_MAC16 + && (mclass == MODE_INT) && (size <= UNITS_PER_WORD)); + else if (GP_REG_P (regno)) + temp = ((regno & 1) == 0 || (size <= UNITS_PER_WORD)); + else if (FP_REG_P (regno)) + temp = (TARGET_HARD_FLOAT && (mode == SFmode)); + else if (BR_REG_P (regno)) + temp = (TARGET_BOOLEANS && (mode == CCmode)); + else + temp = FALSE; + + xtensa_hard_regno_mode_ok[(int) mode][regno] = temp; + } + } + + init_machine_status = xtensa_init_machine_status; + + /* Check PIC settings. PIC is only supported when using L32R + instructions, and some targets need to always use PIC. */ + if (flag_pic && TARGET_CONST16) + error ("-f%s is not supported with CONST16 instructions", + (flag_pic > 1 ? "PIC" : "pic")); + else if (TARGET_FORCE_NO_PIC) + flag_pic = 0; + else if (XTENSA_ALWAYS_PIC) + { + if (TARGET_CONST16) + error ("PIC is required but not supported with CONST16 instructions"); + flag_pic = 1; + } + /* There's no need for -fPIC (as opposed to -fpic) on Xtensa. */ + if (flag_pic > 1) + flag_pic = 1; + if (flag_pic && !flag_pie) + flag_shlib = 1; + + /* Hot/cold partitioning does not work on this architecture, because of + constant pools (the load instruction cannot necessarily reach that far). + Therefore disable it on this architecture. */ + if (flag_reorder_blocks_and_partition) + { + flag_reorder_blocks_and_partition = 0; + flag_reorder_blocks = 1; + } +} + +/* A C compound statement to output to stdio stream STREAM the + assembler syntax for an instruction operand X. X is an RTL + expression. + + CODE is a value that can be used to specify one of several ways + of printing the operand. It is used when identical operands + must be printed differently depending on the context. CODE + comes from the '%' specification that was used to request + printing of the operand. If the specification was just '%DIGIT' + then CODE is 0; if the specification was '%LTR DIGIT' then CODE + is the ASCII code for LTR. + + If X is a register, this macro should print the register's name. + The names can be found in an array 'reg_names' whose type is + 'char *[]'. 'reg_names' is initialized from 'REGISTER_NAMES'. + + When the machine description has a specification '%PUNCT' (a '%' + followed by a punctuation character), this macro is called with + a null pointer for X and the punctuation character for CODE. + + 'a', 'c', 'l', and 'n' are reserved. + + The Xtensa specific codes are: + + 'd' CONST_INT, print as signed decimal + 'x' CONST_INT, print as signed hexadecimal + 'K' CONST_INT, print number of bits in mask for EXTUI + 'R' CONST_INT, print (X & 0x1f) + 'L' CONST_INT, print ((32 - X) & 0x1f) + 'D' REG, print second register of double-word register operand + 'N' MEM, print address of next word following a memory operand + 'v' MEM, if memory reference is volatile, output a MEMW before it + 't' any constant, add "@h" suffix for top 16 bits + 'b' any constant, add "@l" suffix for bottom 16 bits +*/ + +static void +printx (FILE *file, signed int val) +{ + /* Print a hexadecimal value in a nice way. */ + if ((val > -0xa) && (val < 0xa)) + fprintf (file, "%d", val); + else if (val < 0) + fprintf (file, "-0x%x", -val); + else + fprintf (file, "0x%x", val); +} + + +void +print_operand (FILE *file, rtx x, int letter) +{ + if (!x) + error ("PRINT_OPERAND null pointer"); + + switch (letter) + { + case 'D': + if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) + fprintf (file, "%s", reg_names[xt_true_regnum (x) + 1]); + else + output_operand_lossage ("invalid %%D value"); + break; + + case 'v': + if (GET_CODE (x) == MEM) + { + /* For a volatile memory reference, emit a MEMW before the + load or store. */ + if (MEM_VOLATILE_P (x) && TARGET_SERIALIZE_VOLATILE) + fprintf (file, "memw\n\t"); + } + else + output_operand_lossage ("invalid %%v value"); + break; + + case 'N': + if (GET_CODE (x) == MEM + && (GET_MODE (x) == DFmode || GET_MODE (x) == DImode)) + { + x = adjust_address (x, GET_MODE (x) == DFmode ? SFmode : SImode, 4); + output_address (XEXP (x, 0)); + } + else + output_operand_lossage ("invalid %%N value"); + break; + + case 'K': + if (GET_CODE (x) == CONST_INT) + { + int num_bits = 0; + unsigned val = INTVAL (x); + while (val & 1) + { + num_bits += 1; + val = val >> 1; + } + if ((val != 0) || (num_bits == 0) || (num_bits > 16)) + fatal_insn ("invalid mask", x); + + fprintf (file, "%d", num_bits); + } + else + output_operand_lossage ("invalid %%K value"); + break; + + case 'L': + if (GET_CODE (x) == CONST_INT) + fprintf (file, "%ld", (32 - INTVAL (x)) & 0x1f); + else + output_operand_lossage ("invalid %%L value"); + break; + + case 'R': + if (GET_CODE (x) == CONST_INT) + fprintf (file, "%ld", INTVAL (x) & 0x1f); + else + output_operand_lossage ("invalid %%R value"); + break; + + case 'x': + if (GET_CODE (x) == CONST_INT) + printx (file, INTVAL (x)); + else + output_operand_lossage ("invalid %%x value"); + break; + + case 'd': + if (GET_CODE (x) == CONST_INT) + fprintf (file, "%ld", INTVAL (x)); + else + output_operand_lossage ("invalid %%d value"); + break; + + case 't': + case 'b': + if (GET_CODE (x) == CONST_INT) + { + printx (file, INTVAL (x)); + fputs (letter == 't' ? "@h" : "@l", file); + } + else if (GET_CODE (x) == CONST_DOUBLE) + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + if (GET_MODE (x) == SFmode) + { + long l; + REAL_VALUE_TO_TARGET_SINGLE (r, l); + fprintf (file, "0x%08lx@%c", l, letter == 't' ? 'h' : 'l'); + } + else + output_operand_lossage ("invalid %%t/%%b value"); + } + else if (GET_CODE (x) == CONST) + { + /* X must be a symbolic constant on ELF. Write an expression + suitable for 'const16' that sets the high or low 16 bits. */ + if (GET_CODE (XEXP (x, 0)) != PLUS + || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF + && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF) + || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT) + output_operand_lossage ("invalid %%t/%%b value"); + print_operand (file, XEXP (XEXP (x, 0), 0), 0); + fputs (letter == 't' ? "@h" : "@l", file); + /* There must be a non-alphanumeric character between 'h' or 'l' + and the number. The '-' is added by print_operand() already. */ + if (INTVAL (XEXP (XEXP (x, 0), 1)) >= 0) + fputs ("+", file); + print_operand (file, XEXP (XEXP (x, 0), 1), 0); + } + else + { + output_addr_const (file, x); + fputs (letter == 't' ? "@h" : "@l", file); + } + break; + + default: + if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) + fprintf (file, "%s", reg_names[xt_true_regnum (x)]); + else if (GET_CODE (x) == MEM) + output_address (XEXP (x, 0)); + else if (GET_CODE (x) == CONST_INT) + fprintf (file, "%ld", INTVAL (x)); + else + output_addr_const (file, x); + } +} + + +/* A C compound statement to output to stdio stream STREAM the + assembler syntax for an instruction operand that is a memory + reference whose address is ADDR. ADDR is an RTL expression. */ + +void +print_operand_address (FILE *file, rtx addr) +{ + if (!addr) + error ("PRINT_OPERAND_ADDRESS, null pointer"); + + switch (GET_CODE (addr)) + { + default: + fatal_insn ("invalid address", addr); + break; + + case REG: + fprintf (file, "%s, 0", reg_names [REGNO (addr)]); + break; + + case PLUS: + { + rtx reg = (rtx)0; + rtx offset = (rtx)0; + rtx arg0 = XEXP (addr, 0); + rtx arg1 = XEXP (addr, 1); + + if (GET_CODE (arg0) == REG) + { + reg = arg0; + offset = arg1; + } + else if (GET_CODE (arg1) == REG) + { + reg = arg1; + offset = arg0; + } + else + fatal_insn ("no register in address", addr); + + if (CONSTANT_P (offset)) + { + fprintf (file, "%s, ", reg_names [REGNO (reg)]); + output_addr_const (file, offset); + } + else + fatal_insn ("address offset not a constant", addr); + } + break; + + case LABEL_REF: + case SYMBOL_REF: + case CONST_INT: + case CONST: + output_addr_const (file, addr); + break; + } +} + +/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool +xtensa_output_addr_const_extra (FILE *fp, rtx x) +{ + if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1) + { + switch (XINT (x, 1)) + { + case UNSPEC_TPOFF: + output_addr_const (fp, XVECEXP (x, 0, 0)); + fputs ("@TPOFF", fp); + return true; + case UNSPEC_DTPOFF: + output_addr_const (fp, XVECEXP (x, 0, 0)); + fputs ("@DTPOFF", fp); + return true; + case UNSPEC_PLT: + if (flag_pic) + { + output_addr_const (fp, XVECEXP (x, 0, 0)); + fputs ("@PLT", fp); + return true; + } + break; + default: + break; + } + } + return false; +} + + +void +xtensa_output_literal (FILE *file, rtx x, enum machine_mode mode, int labelno) +{ + long value_long[2]; + REAL_VALUE_TYPE r; + int size; + rtx first, second; + + fprintf (file, "\t.literal .LC%u, ", (unsigned) labelno); + + switch (GET_MODE_CLASS (mode)) + { + case MODE_FLOAT: + gcc_assert (GET_CODE (x) == CONST_DOUBLE); + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + switch (mode) + { + case SFmode: + REAL_VALUE_TO_TARGET_SINGLE (r, value_long[0]); + if (HOST_BITS_PER_LONG > 32) + value_long[0] &= 0xffffffff; + fprintf (file, "0x%08lx\n", value_long[0]); + break; + + case DFmode: + REAL_VALUE_TO_TARGET_DOUBLE (r, value_long); + if (HOST_BITS_PER_LONG > 32) + { + value_long[0] &= 0xffffffff; + value_long[1] &= 0xffffffff; + } + fprintf (file, "0x%08lx, 0x%08lx\n", + value_long[0], value_long[1]); + break; + + default: + gcc_unreachable (); + } + + break; + + case MODE_INT: + case MODE_PARTIAL_INT: + size = GET_MODE_SIZE (mode); + switch (size) + { + case 4: + output_addr_const (file, x); + fputs ("\n", file); + break; + + case 8: + split_double (x, &first, &second); + output_addr_const (file, first); + fputs (", ", file); + output_addr_const (file, second); + fputs ("\n", file); + break; + + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } +} + + +/* Return the bytes needed to compute the frame pointer from the current + stack pointer. */ + +#define STACK_BYTES (STACK_BOUNDARY / BITS_PER_UNIT) +#define XTENSA_STACK_ALIGN(LOC) (((LOC) + STACK_BYTES-1) & ~(STACK_BYTES-1)) + +long +compute_frame_size (int size) +{ + /* Add space for the incoming static chain value. */ + if (cfun->static_chain_decl != NULL) + size += (1 * UNITS_PER_WORD); + + xtensa_current_frame_size = + XTENSA_STACK_ALIGN (size + + crtl->outgoing_args_size + + (WINDOW_SIZE * UNITS_PER_WORD)); + return xtensa_current_frame_size; +} + + +bool +xtensa_frame_pointer_required (void) +{ + /* The code to expand builtin_frame_addr and builtin_return_addr + currently uses the hard_frame_pointer instead of frame_pointer. + This seems wrong but maybe it's necessary for other architectures. + This function is derived from the i386 code. */ + + if (cfun->machine->accesses_prev_frame) + return true; + + return false; +} + + +/* minimum frame = reg save area (4 words) plus static chain (1 word) + and the total number of words must be a multiple of 128 bits. */ +#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD) + +void +xtensa_expand_prologue (void) +{ + HOST_WIDE_INT total_size; + rtx size_rtx; + rtx insn, note_rtx; + + total_size = compute_frame_size (get_frame_size ()); + size_rtx = GEN_INT (total_size); + + if (total_size < (1 << (12+3))) + insn = emit_insn (gen_entry (size_rtx)); + else + { + /* Use a8 as a temporary since a0-a7 may be live. */ + rtx tmp_reg = gen_rtx_REG (Pmode, A8_REG); + emit_insn (gen_entry (GEN_INT (MIN_FRAME_SIZE))); + emit_move_insn (tmp_reg, GEN_INT (total_size - MIN_FRAME_SIZE)); + emit_insn (gen_subsi3 (tmp_reg, stack_pointer_rtx, tmp_reg)); + insn = emit_insn (gen_movsi (stack_pointer_rtx, tmp_reg)); + } + + if (frame_pointer_needed) + { + if (cfun->machine->set_frame_ptr_insn) + { + rtx first; + + push_topmost_sequence (); + first = get_insns (); + pop_topmost_sequence (); + + /* For all instructions prior to set_frame_ptr_insn, replace + hard_frame_pointer references with stack_pointer. */ + for (insn = first; + insn != cfun->machine->set_frame_ptr_insn; + insn = NEXT_INSN (insn)) + { + if (INSN_P (insn)) + { + PATTERN (insn) = replace_rtx (copy_rtx (PATTERN (insn)), + hard_frame_pointer_rtx, + stack_pointer_rtx); + df_insn_rescan (insn); + } + } + } + else + insn = emit_insn (gen_movsi (hard_frame_pointer_rtx, + stack_pointer_rtx)); + } + + /* Create a note to describe the CFA. Because this is only used to set + DW_AT_frame_base for debug info, don't bother tracking changes through + each instruction in the prologue. It just takes up space. */ + note_rtx = gen_rtx_SET (VOIDmode, (frame_pointer_needed + ? hard_frame_pointer_rtx + : stack_pointer_rtx), + plus_constant (stack_pointer_rtx, -total_size)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx); +} + + +/* Clear variables at function end. */ + +void +xtensa_function_epilogue (FILE *file ATTRIBUTE_UNUSED, + HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + xtensa_current_frame_size = 0; +} + + +rtx +xtensa_return_addr (int count, rtx frame) +{ + rtx result, retaddr, curaddr, label; + + if (count == -1) + retaddr = gen_rtx_REG (Pmode, A0_REG); + else + { + rtx addr = plus_constant (frame, -4 * UNITS_PER_WORD); + addr = memory_address (Pmode, addr); + retaddr = gen_reg_rtx (Pmode); + emit_move_insn (retaddr, gen_rtx_MEM (Pmode, addr)); + } + + /* The 2 most-significant bits of the return address on Xtensa hold + the register window size. To get the real return address, these + bits must be replaced with the high bits from some address in the + code. */ + + /* Get the 2 high bits of a local label in the code. */ + curaddr = gen_reg_rtx (Pmode); + label = gen_label_rtx (); + emit_label (label); + LABEL_PRESERVE_P (label) = 1; + emit_move_insn (curaddr, gen_rtx_LABEL_REF (Pmode, label)); + emit_insn (gen_lshrsi3 (curaddr, curaddr, GEN_INT (30))); + emit_insn (gen_ashlsi3 (curaddr, curaddr, GEN_INT (30))); + + /* Clear the 2 high bits of the return address. */ + result = gen_reg_rtx (Pmode); + emit_insn (gen_ashlsi3 (result, retaddr, GEN_INT (2))); + emit_insn (gen_lshrsi3 (result, result, GEN_INT (2))); + + /* Combine them to get the result. */ + emit_insn (gen_iorsi3 (result, result, curaddr)); + return result; +} + + +/* Create the va_list data type. + + This structure is set up by __builtin_saveregs. The __va_reg field + points to a stack-allocated region holding the contents of the + incoming argument registers. The __va_ndx field is an index + initialized to the position of the first unnamed (variable) + argument. This same index is also used to address the arguments + passed in memory. Thus, the __va_stk field is initialized to point + to the position of the first argument in memory offset to account + for the arguments passed in registers and to account for the size + of the argument registers not being 16-byte aligned. E.G., there + are 6 argument registers of 4 bytes each, but we want the __va_ndx + for the first stack argument to have the maximal alignment of 16 + bytes, so we offset the __va_stk address by 32 bytes so that + __va_stk[32] references the first argument on the stack. */ + +static tree +xtensa_build_builtin_va_list (void) +{ + tree f_stk, f_reg, f_ndx, record, type_decl; + + record = (*lang_hooks.types.make_type) (RECORD_TYPE); + type_decl = build_decl (BUILTINS_LOCATION, + TYPE_DECL, get_identifier ("__va_list_tag"), record); + + f_stk = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__va_stk"), + ptr_type_node); + f_reg = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__va_reg"), + ptr_type_node); + f_ndx = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__va_ndx"), + integer_type_node); + + DECL_FIELD_CONTEXT (f_stk) = record; + DECL_FIELD_CONTEXT (f_reg) = record; + DECL_FIELD_CONTEXT (f_ndx) = record; + + TYPE_STUB_DECL (record) = type_decl; + TYPE_NAME (record) = type_decl; + TYPE_FIELDS (record) = f_stk; + DECL_CHAIN (f_stk) = f_reg; + DECL_CHAIN (f_reg) = f_ndx; + + layout_type (record); + return record; +} + + +/* Save the incoming argument registers on the stack. Returns the + address of the saved registers. */ + +static rtx +xtensa_builtin_saveregs (void) +{ + rtx gp_regs; + int arg_words = crtl->args.info.arg_words; + int gp_left = MAX_ARGS_IN_REGISTERS - arg_words; + + if (gp_left <= 0) + return const0_rtx; + + /* Allocate the general-purpose register space. */ + gp_regs = assign_stack_local + (BLKmode, MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD, -1); + set_mem_alias_set (gp_regs, get_varargs_alias_set ()); + + /* Now store the incoming registers. */ + cfun->machine->need_a7_copy = true; + cfun->machine->vararg_a7 = true; + move_block_from_reg (GP_ARG_FIRST + arg_words, + adjust_address (gp_regs, BLKmode, + arg_words * UNITS_PER_WORD), + gp_left); + gcc_assert (cfun->machine->vararg_a7_copy != 0); + emit_insn_before (cfun->machine->vararg_a7_copy, get_insns ()); + + return XEXP (gp_regs, 0); +} + + +/* Implement `va_start' for varargs and stdarg. We look at the + current function to fill in an initial va_list. */ + +static void +xtensa_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) +{ + tree f_stk, stk; + tree f_reg, reg; + tree f_ndx, ndx; + tree t, u; + int arg_words; + + arg_words = crtl->args.info.arg_words; + + f_stk = TYPE_FIELDS (va_list_type_node); + f_reg = DECL_CHAIN (f_stk); + f_ndx = DECL_CHAIN (f_reg); + + stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist, f_stk, NULL_TREE); + reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), unshare_expr (valist), + f_reg, NULL_TREE); + ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), unshare_expr (valist), + f_ndx, NULL_TREE); + + /* Call __builtin_saveregs; save the result in __va_reg */ + u = make_tree (sizetype, expand_builtin_saveregs ()); + u = fold_convert (ptr_type_node, u); + t = build2 (MODIFY_EXPR, ptr_type_node, reg, u); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + /* Set the __va_stk member to ($arg_ptr - 32). */ + u = make_tree (ptr_type_node, virtual_incoming_args_rtx); + u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u, size_int (-32)); + t = build2 (MODIFY_EXPR, ptr_type_node, stk, u); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + /* Set the __va_ndx member. If the first variable argument is on + the stack, adjust __va_ndx by 2 words to account for the extra + alignment offset for __va_stk. */ + if (arg_words >= MAX_ARGS_IN_REGISTERS) + arg_words += 2; + t = build2 (MODIFY_EXPR, integer_type_node, ndx, + build_int_cst (integer_type_node, arg_words * UNITS_PER_WORD)); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); +} + + +/* Implement `va_arg'. */ + +static tree +xtensa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p ATTRIBUTE_UNUSED) +{ + tree f_stk, stk; + tree f_reg, reg; + tree f_ndx, ndx; + tree type_size, array, orig_ndx, addr, size, va_size, t; + tree lab_false, lab_over, lab_false2; + bool indirect; + + indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); + if (indirect) + type = build_pointer_type (type); + + /* Handle complex values as separate real and imaginary parts. */ + if (TREE_CODE (type) == COMPLEX_TYPE) + { + tree real_part, imag_part; + + real_part = xtensa_gimplify_va_arg_expr (valist, TREE_TYPE (type), + pre_p, NULL); + real_part = get_initialized_tmp_var (real_part, pre_p, NULL); + + imag_part = xtensa_gimplify_va_arg_expr (unshare_expr (valist), + TREE_TYPE (type), + pre_p, NULL); + imag_part = get_initialized_tmp_var (imag_part, pre_p, NULL); + + return build2 (COMPLEX_EXPR, type, real_part, imag_part); + } + + f_stk = TYPE_FIELDS (va_list_type_node); + f_reg = DECL_CHAIN (f_stk); + f_ndx = DECL_CHAIN (f_reg); + + stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist, + f_stk, NULL_TREE); + reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), unshare_expr (valist), + f_reg, NULL_TREE); + ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), unshare_expr (valist), + f_ndx, NULL_TREE); + + type_size = size_in_bytes (type); + va_size = round_up (type_size, UNITS_PER_WORD); + gimplify_expr (&va_size, pre_p, NULL, is_gimple_val, fb_rvalue); + + + /* First align __va_ndx if necessary for this arg: + + orig_ndx = (AP).__va_ndx; + if (__alignof__ (TYPE) > 4 ) + orig_ndx = ((orig_ndx + __alignof__ (TYPE) - 1) + & -__alignof__ (TYPE)); */ + + orig_ndx = get_initialized_tmp_var (ndx, pre_p, NULL); + + if (TYPE_ALIGN (type) > BITS_PER_WORD) + { + int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_UNIT; + + t = build2 (PLUS_EXPR, integer_type_node, unshare_expr (orig_ndx), + build_int_cst (integer_type_node, align - 1)); + t = build2 (BIT_AND_EXPR, integer_type_node, t, + build_int_cst (integer_type_node, -align)); + gimplify_assign (unshare_expr (orig_ndx), t, pre_p); + } + + + /* Increment __va_ndx to point past the argument: + + (AP).__va_ndx = orig_ndx + __va_size (TYPE); */ + + t = fold_convert (integer_type_node, va_size); + t = build2 (PLUS_EXPR, integer_type_node, orig_ndx, t); + gimplify_assign (unshare_expr (ndx), t, pre_p); + + + /* Check if the argument is in registers: + + if ((AP).__va_ndx <= __MAX_ARGS_IN_REGISTERS * 4 + && !must_pass_in_stack (type)) + __array = (AP).__va_reg; */ + + array = create_tmp_var (ptr_type_node, NULL); + + lab_over = NULL; + if (!targetm.calls.must_pass_in_stack (TYPE_MODE (type), type)) + { + lab_false = create_artificial_label (UNKNOWN_LOCATION); + lab_over = create_artificial_label (UNKNOWN_LOCATION); + + t = build2 (GT_EXPR, boolean_type_node, unshare_expr (ndx), + build_int_cst (integer_type_node, + MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD)); + t = build3 (COND_EXPR, void_type_node, t, + build1 (GOTO_EXPR, void_type_node, lab_false), + NULL_TREE); + gimplify_and_add (t, pre_p); + + gimplify_assign (unshare_expr (array), reg, pre_p); + + t = build1 (GOTO_EXPR, void_type_node, lab_over); + gimplify_and_add (t, pre_p); + + t = build1 (LABEL_EXPR, void_type_node, lab_false); + gimplify_and_add (t, pre_p); + } + + + /* ...otherwise, the argument is on the stack (never split between + registers and the stack -- change __va_ndx if necessary): + + else + { + if (orig_ndx <= __MAX_ARGS_IN_REGISTERS * 4) + (AP).__va_ndx = 32 + __va_size (TYPE); + __array = (AP).__va_stk; + } */ + + lab_false2 = create_artificial_label (UNKNOWN_LOCATION); + + t = build2 (GT_EXPR, boolean_type_node, unshare_expr (orig_ndx), + build_int_cst (integer_type_node, + MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD)); + t = build3 (COND_EXPR, void_type_node, t, + build1 (GOTO_EXPR, void_type_node, lab_false2), + NULL_TREE); + gimplify_and_add (t, pre_p); + + t = size_binop (PLUS_EXPR, unshare_expr (va_size), size_int (32)); + t = fold_convert (integer_type_node, t); + gimplify_assign (unshare_expr (ndx), t, pre_p); + + t = build1 (LABEL_EXPR, void_type_node, lab_false2); + gimplify_and_add (t, pre_p); + + gimplify_assign (array, stk, pre_p); + + if (lab_over) + { + t = build1 (LABEL_EXPR, void_type_node, lab_over); + gimplify_and_add (t, pre_p); + } + + + /* Given the base array pointer (__array) and index to the subsequent + argument (__va_ndx), find the address: + + __array + (AP).__va_ndx - (BYTES_BIG_ENDIAN && sizeof (TYPE) < 4 + ? sizeof (TYPE) + : __va_size (TYPE)) + + The results are endian-dependent because values smaller than one word + are aligned differently. */ + + + if (BYTES_BIG_ENDIAN && TREE_CODE (type_size) == INTEGER_CST) + { + t = fold_build2 (GE_EXPR, boolean_type_node, unshare_expr (type_size), + size_int (PARM_BOUNDARY / BITS_PER_UNIT)); + t = fold_build3 (COND_EXPR, sizetype, t, unshare_expr (va_size), + unshare_expr (type_size)); + size = t; + } + else + size = unshare_expr (va_size); + + t = fold_convert (sizetype, unshare_expr (ndx)); + t = build2 (MINUS_EXPR, sizetype, t, size); + addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (array), t); + + addr = fold_convert (build_pointer_type (type), addr); + if (indirect) + addr = build_va_arg_indirect_ref (addr); + return build_va_arg_indirect_ref (addr); +} + + +/* Builtins. */ + +enum xtensa_builtin +{ + XTENSA_BUILTIN_UMULSIDI3, + XTENSA_BUILTIN_THREAD_POINTER, + XTENSA_BUILTIN_SET_THREAD_POINTER, + XTENSA_BUILTIN_max +}; + + +static void +xtensa_init_builtins (void) +{ + tree ftype, decl; + + ftype = build_function_type_list (unsigned_intDI_type_node, + unsigned_intSI_type_node, + unsigned_intSI_type_node, NULL_TREE); + + decl = add_builtin_function ("__builtin_umulsidi3", ftype, + XTENSA_BUILTIN_UMULSIDI3, BUILT_IN_MD, + "__umulsidi3", NULL_TREE); + TREE_NOTHROW (decl) = 1; + TREE_READONLY (decl) = 1; + + if (TARGET_THREADPTR) + { + ftype = build_function_type (ptr_type_node, void_list_node); + decl = add_builtin_function ("__builtin_thread_pointer", ftype, + XTENSA_BUILTIN_THREAD_POINTER, BUILT_IN_MD, + NULL, NULL_TREE); + TREE_READONLY (decl) = 1; + TREE_NOTHROW (decl) = 1; + + ftype = build_function_type_list (void_type_node, ptr_type_node, + NULL_TREE); + decl = add_builtin_function ("__builtin_set_thread_pointer", ftype, + XTENSA_BUILTIN_SET_THREAD_POINTER, + BUILT_IN_MD, NULL, NULL_TREE); + TREE_NOTHROW (decl) = 1; + } +} + + +static tree +xtensa_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, + bool ignore ATTRIBUTE_UNUSED) +{ + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree arg0, arg1; + + switch (fcode) + { + case XTENSA_BUILTIN_UMULSIDI3: + arg0 = args[0]; + arg1 = args[1]; + if ((TREE_CODE (arg0) == INTEGER_CST && TREE_CODE (arg1) == INTEGER_CST) + || TARGET_MUL32_HIGH) + return fold_build2 (MULT_EXPR, unsigned_intDI_type_node, + fold_convert (unsigned_intDI_type_node, arg0), + fold_convert (unsigned_intDI_type_node, arg1)); + break; + + case XTENSA_BUILTIN_THREAD_POINTER: + case XTENSA_BUILTIN_SET_THREAD_POINTER: + break; + + default: + internal_error ("bad builtin code"); + break; + } + + return NULL; +} + + +static rtx +xtensa_expand_builtin (tree exp, rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + rtx arg; + + switch (fcode) + { + case XTENSA_BUILTIN_UMULSIDI3: + /* The umulsidi3 builtin is just a mechanism to avoid calling the real + __umulsidi3 function when the Xtensa configuration can directly + implement it. If not, just call the function. */ + return expand_call (exp, target, ignore); + + case XTENSA_BUILTIN_THREAD_POINTER: + if (!target || !register_operand (target, Pmode)) + target = gen_reg_rtx (Pmode); + emit_insn (gen_load_tp (target)); + return target; + + case XTENSA_BUILTIN_SET_THREAD_POINTER: + arg = expand_normal (CALL_EXPR_ARG (exp, 0)); + if (!register_operand (arg, Pmode)) + arg = copy_to_mode_reg (Pmode, arg); + emit_insn (gen_set_tp (arg)); + return const0_rtx; + + default: + internal_error ("bad builtin code"); + } + return NULL_RTX; +} + +/* Worker function for TARGET_PREFERRED_RELOAD_CLASS. */ + +static reg_class_t +xtensa_preferred_reload_class (rtx x, reg_class_t rclass) +{ + if (CONSTANT_P (x) && CONST_DOUBLE_P (x)) + return NO_REGS; + + /* Don't use the stack pointer or hard frame pointer for reloads! + The hard frame pointer would normally be OK except that it may + briefly hold an incoming argument in the prologue, and reload + won't know that it is live because the hard frame pointer is + treated specially. */ + + if (rclass == AR_REGS || rclass == GR_REGS) + return RL_REGS; + + return rclass; +} + +/* Worker function for TARGET_PREFERRED_OUTPUT_RELOAD_CLASS. */ + +static reg_class_t +xtensa_preferred_output_reload_class (rtx x ATTRIBUTE_UNUSED, + reg_class_t rclass) +{ + /* Don't use the stack pointer or hard frame pointer for reloads! + The hard frame pointer would normally be OK except that it may + briefly hold an incoming argument in the prologue, and reload + won't know that it is live because the hard frame pointer is + treated specially. */ + + if (rclass == AR_REGS || rclass == GR_REGS) + return RL_REGS; + + return rclass; +} + +/* Worker function for TARGET_SECONDARY_RELOAD. */ + +static reg_class_t +xtensa_secondary_reload (bool in_p, rtx x, reg_class_t rclass, + enum machine_mode mode, secondary_reload_info *sri) +{ + int regno; + + if (in_p && constantpool_mem_p (x)) + { + if (rclass == FP_REGS) + return RL_REGS; + + if (mode == QImode) + sri->icode = CODE_FOR_reloadqi_literal; + else if (mode == HImode) + sri->icode = CODE_FOR_reloadhi_literal; + } + + regno = xt_true_regnum (x); + if (ACC_REG_P (regno)) + return ((rclass == GR_REGS || rclass == RL_REGS) ? NO_REGS : RL_REGS); + if (rclass == ACC_REG) + return (GP_REG_P (regno) ? NO_REGS : RL_REGS); + + return NO_REGS; +} + + +void +order_regs_for_local_alloc (void) +{ + if (!leaf_function_p ()) + { + memcpy (reg_alloc_order, reg_nonleaf_alloc_order, + FIRST_PSEUDO_REGISTER * sizeof (int)); + } + else + { + int i, num_arg_regs; + int nxt = 0; + + /* Use the AR registers in increasing order (skipping a0 and a1) + but save the incoming argument registers for a last resort. */ + num_arg_regs = crtl->args.info.arg_words; + if (num_arg_regs > MAX_ARGS_IN_REGISTERS) + num_arg_regs = MAX_ARGS_IN_REGISTERS; + for (i = GP_ARG_FIRST; i < 16 - num_arg_regs; i++) + reg_alloc_order[nxt++] = i + num_arg_regs; + for (i = 0; i < num_arg_regs; i++) + reg_alloc_order[nxt++] = GP_ARG_FIRST + i; + + /* List the coprocessor registers in order. */ + for (i = 0; i < BR_REG_NUM; i++) + reg_alloc_order[nxt++] = BR_REG_FIRST + i; + + /* List the FP registers in order for now. */ + for (i = 0; i < 16; i++) + reg_alloc_order[nxt++] = FP_REG_FIRST + i; + + /* GCC requires that we list *all* the registers.... */ + reg_alloc_order[nxt++] = 0; /* a0 = return address */ + reg_alloc_order[nxt++] = 1; /* a1 = stack pointer */ + reg_alloc_order[nxt++] = 16; /* pseudo frame pointer */ + reg_alloc_order[nxt++] = 17; /* pseudo arg pointer */ + + reg_alloc_order[nxt++] = ACC_REG_FIRST; /* MAC16 accumulator */ + } +} + + +/* Some Xtensa targets support multiple bss sections. If the section + name ends with ".bss", add SECTION_BSS to the flags. */ + +static unsigned int +xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags = default_section_type_flags (decl, name, reloc); + const char *suffix; + + suffix = strrchr (name, '.'); + if (suffix && strcmp (suffix, ".bss") == 0) + { + if (!decl || (TREE_CODE (decl) == VAR_DECL + && DECL_INITIAL (decl) == NULL_TREE)) + flags |= SECTION_BSS; /* @nobits */ + else + warning (0, "only uninitialized variables can be placed in a " + ".bss section"); + } + + return flags; +} + + +/* The literal pool stays with the function. */ + +static section * +xtensa_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED, + rtx x ATTRIBUTE_UNUSED, + unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) +{ + return function_section (current_function_decl); +} + +/* Worker function for TARGET_REGISTER_MOVE_COST. */ + +static int +xtensa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from, reg_class_t to) +{ + if (from == to && from != BR_REGS && to != BR_REGS) + return 2; + else if (reg_class_subset_p (from, AR_REGS) + && reg_class_subset_p (to, AR_REGS)) + return 2; + else if (reg_class_subset_p (from, AR_REGS) && to == ACC_REG) + return 3; + else if (from == ACC_REG && reg_class_subset_p (to, AR_REGS)) + return 3; + else + return 10; +} + +/* Worker function for TARGET_MEMORY_MOVE_COST. */ + +static int +xtensa_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) +{ + return 4; +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +xtensa_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed ATTRIBUTE_UNUSED) +{ + switch (code) + { + case CONST_INT: + switch (outer_code) + { + case SET: + if (xtensa_simm12b (INTVAL (x))) + { + *total = 4; + return true; + } + break; + case PLUS: + if (xtensa_simm8 (INTVAL (x)) + || xtensa_simm8x256 (INTVAL (x))) + { + *total = 0; + return true; + } + break; + case AND: + if (xtensa_mask_immediate (INTVAL (x))) + { + *total = 0; + return true; + } + break; + case COMPARE: + if ((INTVAL (x) == 0) || xtensa_b4const (INTVAL (x))) + { + *total = 0; + return true; + } + break; + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + case ROTATE: + case ROTATERT: + /* No way to tell if X is the 2nd operand so be conservative. */ + default: break; + } + if (xtensa_simm12b (INTVAL (x))) + *total = 5; + else if (TARGET_CONST16) + *total = COSTS_N_INSNS (2); + else + *total = 6; + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + if (TARGET_CONST16) + *total = COSTS_N_INSNS (2); + else + *total = 5; + return true; + + case CONST_DOUBLE: + if (TARGET_CONST16) + *total = COSTS_N_INSNS (4); + else + *total = 7; + return true; + + case MEM: + { + int num_words = + (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) ? 2 : 1; + + if (memory_address_p (GET_MODE (x), XEXP ((x), 0))) + *total = COSTS_N_INSNS (num_words); + else + *total = COSTS_N_INSNS (2*num_words); + return true; + } + + case FFS: + case CTZ: + *total = COSTS_N_INSNS (TARGET_NSA ? 5 : 50); + return true; + + case CLZ: + *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50); + return true; + + case NOT: + *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 3 : 2); + return true; + + case AND: + case IOR: + case XOR: + if (GET_MODE (x) == DImode) + *total = COSTS_N_INSNS (2); + else + *total = COSTS_N_INSNS (1); + return true; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + if (GET_MODE (x) == DImode) + *total = COSTS_N_INSNS (50); + else + *total = COSTS_N_INSNS (1); + return true; + + case ABS: + { + enum machine_mode xmode = GET_MODE (x); + if (xmode == SFmode) + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); + else if (xmode == DFmode) + *total = COSTS_N_INSNS (50); + else + *total = COSTS_N_INSNS (4); + return true; + } + + case PLUS: + case MINUS: + { + enum machine_mode xmode = GET_MODE (x); + if (xmode == SFmode) + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50); + else if (xmode == DFmode || xmode == DImode) + *total = COSTS_N_INSNS (50); + else + *total = COSTS_N_INSNS (1); + return true; + } + + case NEG: + *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 4 : 2); + return true; + + case MULT: + { + enum machine_mode xmode = GET_MODE (x); + if (xmode == SFmode) + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 4 : 50); + else if (xmode == DFmode) + *total = COSTS_N_INSNS (50); + else if (xmode == DImode) + *total = COSTS_N_INSNS (TARGET_MUL32_HIGH ? 10 : 50); + else if (TARGET_MUL32) + *total = COSTS_N_INSNS (4); + else if (TARGET_MAC16) + *total = COSTS_N_INSNS (16); + else if (TARGET_MUL16) + *total = COSTS_N_INSNS (12); + else + *total = COSTS_N_INSNS (50); + return true; + } + + case DIV: + case MOD: + { + enum machine_mode xmode = GET_MODE (x); + if (xmode == SFmode) + { + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT_DIV ? 8 : 50); + return true; + } + else if (xmode == DFmode) + { + *total = COSTS_N_INSNS (50); + return true; + } + } + /* Fall through. */ + + case UDIV: + case UMOD: + { + enum machine_mode xmode = GET_MODE (x); + if (xmode == DImode) + *total = COSTS_N_INSNS (50); + else if (TARGET_DIV32) + *total = COSTS_N_INSNS (32); + else + *total = COSTS_N_INSNS (50); + return true; + } + + case SQRT: + if (GET_MODE (x) == SFmode) + *total = COSTS_N_INSNS (TARGET_HARD_FLOAT_SQRT ? 8 : 50); + else + *total = COSTS_N_INSNS (50); + return true; + + case SMIN: + case UMIN: + case SMAX: + case UMAX: + *total = COSTS_N_INSNS (TARGET_MINMAX ? 1 : 50); + return true; + + case SIGN_EXTRACT: + case SIGN_EXTEND: + *total = COSTS_N_INSNS (TARGET_SEXT ? 1 : 2); + return true; + + case ZERO_EXTRACT: + case ZERO_EXTEND: + *total = COSTS_N_INSNS (1); + return true; + + default: + return false; + } +} + +/* Worker function for TARGET_RETURN_IN_MEMORY. */ + +static bool +xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) + > 4 * UNITS_PER_WORD); +} + +/* Worker function for TARGET_FUNCTION_VALUE. */ + +rtx +xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, + bool outgoing) +{ + return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype) + && TYPE_PRECISION (valtype) < BITS_PER_WORD) + ? SImode : TYPE_MODE (valtype), + outgoing ? GP_OUTGOING_RETURN : GP_RETURN); +} + +/* Worker function for TARGET_LIBCALL_VALUE. */ + +static rtx +xtensa_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG ((GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < UNITS_PER_WORD) + ? SImode : mode, GP_RETURN); +} + +/* Worker function TARGET_FUNCTION_VALUE_REGNO_P. */ + +static bool +xtensa_function_value_regno_p (const unsigned int regno) +{ + return (regno == GP_RETURN); +} + +/* The static chain is passed in memory. Provide rtx giving 'mem' + expressions that denote where they are stored. */ + +static rtx +xtensa_static_chain (const_tree ARG_UNUSED (fndecl), bool incoming_p) +{ + rtx base = incoming_p ? arg_pointer_rtx : stack_pointer_rtx; + return gen_frame_mem (Pmode, plus_constant (base, -5 * UNITS_PER_WORD)); +} + + +/* TRAMPOLINE_TEMPLATE: For Xtensa, the trampoline must perform an ENTRY + instruction with a minimal stack frame in order to get some free + registers. Once the actual call target is known, the proper stack frame + size is extracted from the ENTRY instruction at the target and the + current frame is adjusted to match. The trampoline then transfers + control to the instruction following the ENTRY at the target. Note: + this assumes that the target begins with an ENTRY instruction. */ + +static void +xtensa_asm_trampoline_template (FILE *stream) +{ + bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS); + + fprintf (stream, "\t.begin no-transform\n"); + fprintf (stream, "\tentry\tsp, %d\n", MIN_FRAME_SIZE); + + if (use_call0) + { + /* Save the return address. */ + fprintf (stream, "\tmov\ta10, a0\n"); + + /* Use a CALL0 instruction to skip past the constants and in the + process get the PC into A0. This allows PC-relative access to + the constants without relying on L32R. */ + fprintf (stream, "\tcall0\t.Lskipconsts\n"); + } + else + fprintf (stream, "\tj\t.Lskipconsts\n"); + + fprintf (stream, "\t.align\t4\n"); + fprintf (stream, ".Lchainval:%s0\n", integer_asm_op (4, TRUE)); + fprintf (stream, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE)); + fprintf (stream, ".Lskipconsts:\n"); + + /* Load the static chain and function address from the trampoline. */ + if (use_call0) + { + fprintf (stream, "\taddi\ta0, a0, 3\n"); + fprintf (stream, "\tl32i\ta9, a0, 0\n"); + fprintf (stream, "\tl32i\ta8, a0, 4\n"); + } + else + { + fprintf (stream, "\tl32r\ta9, .Lchainval\n"); + fprintf (stream, "\tl32r\ta8, .Lfnaddr\n"); + } + + /* Store the static chain. */ + fprintf (stream, "\ts32i\ta9, sp, %d\n", MIN_FRAME_SIZE - 20); + + /* Set the proper stack pointer value. */ + fprintf (stream, "\tl32i\ta9, a8, 0\n"); + fprintf (stream, "\textui\ta9, a9, %d, 12\n", + TARGET_BIG_ENDIAN ? 8 : 12); + fprintf (stream, "\tslli\ta9, a9, 3\n"); + fprintf (stream, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE); + fprintf (stream, "\tsub\ta9, sp, a9\n"); + fprintf (stream, "\tmovsp\tsp, a9\n"); + + if (use_call0) + /* Restore the return address. */ + fprintf (stream, "\tmov\ta0, a10\n"); + + /* Jump to the instruction following the ENTRY. */ + fprintf (stream, "\taddi\ta8, a8, 3\n"); + fprintf (stream, "\tjx\ta8\n"); + + /* Pad size to a multiple of TRAMPOLINE_ALIGNMENT. */ + if (use_call0) + fprintf (stream, "\t.byte\t0\n"); + else + fprintf (stream, "\tnop\n"); + + fprintf (stream, "\t.end no-transform\n"); +} + +static void +xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain) +{ + rtx func = XEXP (DECL_RTL (fndecl), 0); + bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS); + int chain_off = use_call0 ? 12 : 8; + int func_off = use_call0 ? 16 : 12; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + emit_move_insn (adjust_address (m_tramp, SImode, chain_off), chain); + emit_move_insn (adjust_address (m_tramp, SImode, func_off), func); + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"), + LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); +} + + +#include "gt-xtensa.h" diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h new file mode 100644 index 000000000..0a096cdb5 --- /dev/null +++ b/gcc/config/xtensa/xtensa.h @@ -0,0 +1,847 @@ +/* Definitions of Tensilica's Xtensa target machine for GNU compiler. + Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Get Xtensa configuration settings */ +#include "xtensa-config.h" + +/* External variables defined in xtensa.c. */ + +extern unsigned xtensa_current_frame_size; + +/* Macros used in the machine description to select various Xtensa + configuration options. */ +#ifndef XCHAL_HAVE_MUL32_HIGH +#define XCHAL_HAVE_MUL32_HIGH 0 +#endif +#ifndef XCHAL_HAVE_RELEASE_SYNC +#define XCHAL_HAVE_RELEASE_SYNC 0 +#endif +#ifndef XCHAL_HAVE_S32C1I +#define XCHAL_HAVE_S32C1I 0 +#endif +#ifndef XCHAL_HAVE_THREADPTR +#define XCHAL_HAVE_THREADPTR 0 +#endif +#define TARGET_BIG_ENDIAN XCHAL_HAVE_BE +#define TARGET_DENSITY XCHAL_HAVE_DENSITY +#define TARGET_MAC16 XCHAL_HAVE_MAC16 +#define TARGET_MUL16 XCHAL_HAVE_MUL16 +#define TARGET_MUL32 XCHAL_HAVE_MUL32 +#define TARGET_MUL32_HIGH XCHAL_HAVE_MUL32_HIGH +#define TARGET_DIV32 XCHAL_HAVE_DIV32 +#define TARGET_NSA XCHAL_HAVE_NSA +#define TARGET_MINMAX XCHAL_HAVE_MINMAX +#define TARGET_SEXT XCHAL_HAVE_SEXT +#define TARGET_BOOLEANS XCHAL_HAVE_BOOLEANS +#define TARGET_HARD_FLOAT XCHAL_HAVE_FP +#define TARGET_HARD_FLOAT_DIV XCHAL_HAVE_FP_DIV +#define TARGET_HARD_FLOAT_RECIP XCHAL_HAVE_FP_RECIP +#define TARGET_HARD_FLOAT_SQRT XCHAL_HAVE_FP_SQRT +#define TARGET_HARD_FLOAT_RSQRT XCHAL_HAVE_FP_RSQRT +#define TARGET_ABS XCHAL_HAVE_ABS +#define TARGET_ADDX XCHAL_HAVE_ADDX +#define TARGET_RELEASE_SYNC XCHAL_HAVE_RELEASE_SYNC +#define TARGET_S32C1I XCHAL_HAVE_S32C1I +#define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS +#define TARGET_THREADPTR XCHAL_HAVE_THREADPTR + +#define TARGET_DEFAULT \ + ((XCHAL_HAVE_L32R ? 0 : MASK_CONST16) | \ + MASK_SERIALIZE_VOLATILE) + +#ifndef HAVE_AS_TLS +#define HAVE_AS_TLS 0 +#endif + + +/* Target CPU builtins. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do { \ + builtin_assert ("cpu=xtensa"); \ + builtin_assert ("machine=xtensa"); \ + builtin_define ("__xtensa__"); \ + builtin_define ("__XTENSA__"); \ + builtin_define ("__XTENSA_WINDOWED_ABI__"); \ + builtin_define (TARGET_BIG_ENDIAN ? "__XTENSA_EB__" : "__XTENSA_EL__"); \ + if (!TARGET_HARD_FLOAT) \ + builtin_define ("__XTENSA_SOFT_FLOAT__"); \ + } while (0) + +#define CPP_SPEC " %(subtarget_cpp_spec) " + +#ifndef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "" +#endif + +#define EXTRA_SPECS \ + { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, + +/* Target machine storage layout */ + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ +#define BITS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) + +/* Define this if most significant byte of a word is the lowest numbered. */ +#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) + +/* Define this if most significant word of a multiword number is the lowest. */ +#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) + +#define MAX_BITS_PER_WORD 32 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 4 +#define MIN_UNITS_PER_WORD 4 + +/* Width of a floating point register. */ +#define UNITS_PER_FPREG 4 + +/* Size in bits of various types on the target machine. */ +#define INT_TYPE_SIZE 32 +#define SHORT_TYPE_SIZE 16 +#define LONG_TYPE_SIZE 32 +#define LONG_LONG_TYPE_SIZE 64 +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +/* Allocation boundary (in *bits*) for storing pointers in memory. */ +#define POINTER_BOUNDARY 32 + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY 32 + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Alignment of field after 'int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 32 + +/* Every structure's size must be a multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* There is no point aligning anything to a rounder boundary than this. */ +#define BIGGEST_ALIGNMENT 128 + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 1 + +/* Promote integer modes smaller than a word to SImode. Set UNSIGNEDP + for QImode, because there is no 8-bit load from memory with sign + extension. Otherwise, leave UNSIGNEDP alone, since Xtensa has 16-bit + loads both with and without sign extension. */ +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + do { \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ + { \ + if ((MODE) == QImode) \ + (UNSIGNEDP) = 1; \ + (MODE) = SImode; \ + } \ + } while (0) + +/* Imitate the way many other C compilers handle alignment of + bitfields and the structures that contain them. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* Disable the use of word-sized or smaller complex modes for structures, + and for function arguments in particular, where they cause problems with + register a7. The xtensa_copy_incoming_a7 function assumes that there is + a single reference to an argument in a7, but with small complex modes the + real and imaginary components may be extracted separately, leading to two + uses of the register, only one of which would be replaced. */ +#define MEMBER_TYPE_FORCES_BLK(FIELD, MODE) \ + ((MODE) == CQImode || (MODE) == CHImode) + +/* Align string constants and constructors to at least a word boundary. + The typical use of this macro is to increase alignment for string + constants to be word aligned so that 'strcpy' calls that copy + constants can be done inline. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST || TREE_CODE (EXP) == CONSTRUCTOR) \ + && (ALIGN) < BITS_PER_WORD \ + ? BITS_PER_WORD \ + : (ALIGN)) + +/* Align arrays, unions and records to at least a word boundary. + One use of this macro is to increase alignment of medium-size + data to make it all fit in fewer cache lines. Another is to + cause character arrays to be word-aligned so that 'strcpy' calls + that copy constants to character arrays can be done inline. */ +#undef DATA_ALIGNMENT +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + ((((ALIGN) < BITS_PER_WORD) \ + && (TREE_CODE (TYPE) == ARRAY_TYPE \ + || TREE_CODE (TYPE) == UNION_TYPE \ + || TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN)) + +/* Operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Xtensa loads are zero-extended by default. */ +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + +/* Standard register usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. + + The fake frame pointer and argument pointer will never appear in + the generated code, since they will always be eliminated and replaced + by either the stack pointer or the hard frame pointer. + + 0 - 15 AR[0] - AR[15] + 16 FRAME_POINTER (fake = initial sp) + 17 ARG_POINTER (fake = initial sp + framesize) + 18 BR[0] for floating-point CC + 19 - 34 FR[0] - FR[15] + 35 MAC16 accumulator */ + +#define FIRST_PSEUDO_REGISTER 36 + +/* Return the stabs register number to use for REGNO. */ +#define DBX_REGISTER_NUMBER(REGNO) xtensa_dbx_register_number (REGNO) + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. */ +#define FIXED_REGISTERS \ +{ \ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, \ +} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ +#define CALL_USED_REGISTERS \ +{ \ + 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, \ +} + +/* For non-leaf procedures on Xtensa processors, the allocation order + is as specified below by REG_ALLOC_ORDER. For leaf procedures, we + want to use the lowest numbered registers first to minimize + register window overflows. However, local-alloc is not smart + enough to consider conflicts with incoming arguments. If an + incoming argument in a2 is live throughout the function and + local-alloc decides to use a2, then the incoming argument must + either be spilled or copied to another register. To get around + this, we define ADJUST_REG_ALLOC_ORDER to redefine + reg_alloc_order for leaf functions such that lowest numbered + registers are used first with the exception that the incoming + argument registers are not used until after other register choices + have been exhausted. */ + +#define REG_ALLOC_ORDER \ +{ 8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, \ + 18, \ + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \ + 0, 1, 16, 17, \ + 35, \ +} + +#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc () + +/* For Xtensa, the only point of this is to prevent GCC from otherwise + giving preference to call-used registers. To minimize window + overflows for the AR registers, we want to give preference to the + lower-numbered AR registers. For other register files, which are + not windowed, we still prefer call-used registers, if there are any. */ +extern const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER]; +#define LEAF_REGISTERS xtensa_leaf_regs + +/* For Xtensa, no remapping is necessary, but this macro must be + defined if LEAF_REGISTERS is defined. */ +#define LEAF_REG_REMAP(REGNO) (REGNO) + +/* This must be declared if LEAF_REGISTERS is set. */ +extern int leaf_function; + +/* Internal macros to classify a register number. */ + +/* 16 address registers + fake registers */ +#define GP_REG_FIRST 0 +#define GP_REG_LAST 17 +#define GP_REG_NUM (GP_REG_LAST - GP_REG_FIRST + 1) + +/* Coprocessor registers */ +#define BR_REG_FIRST 18 +#define BR_REG_LAST 18 +#define BR_REG_NUM (BR_REG_LAST - BR_REG_FIRST + 1) + +/* 16 floating-point registers */ +#define FP_REG_FIRST 19 +#define FP_REG_LAST 34 +#define FP_REG_NUM (FP_REG_LAST - FP_REG_FIRST + 1) + +/* MAC16 accumulator */ +#define ACC_REG_FIRST 35 +#define ACC_REG_LAST 35 +#define ACC_REG_NUM (ACC_REG_LAST - ACC_REG_FIRST + 1) + +#define GP_REG_P(REGNO) ((unsigned) ((REGNO) - GP_REG_FIRST) < GP_REG_NUM) +#define BR_REG_P(REGNO) ((unsigned) ((REGNO) - BR_REG_FIRST) < BR_REG_NUM) +#define FP_REG_P(REGNO) ((unsigned) ((REGNO) - FP_REG_FIRST) < FP_REG_NUM) +#define ACC_REG_P(REGNO) ((unsigned) ((REGNO) - ACC_REG_FIRST) < ACC_REG_NUM) + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. */ +#define HARD_REGNO_NREGS(REGNO, MODE) \ + (FP_REG_P (REGNO) ? \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG) : \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode + MODE. */ +extern char xtensa_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER]; + +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + xtensa_hard_regno_mode_ok[(int) (MODE)][(REGNO)] + +/* Value is 1 if it is a good idea to tie two pseudo registers + when one has mode MODE1 and one has mode MODE2. + If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, + for any hard reg, then this must be 0 for correct output. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ + ((GET_MODE_CLASS (MODE1) == MODE_FLOAT || \ + GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT) \ + == (GET_MODE_CLASS (MODE2) == MODE_FLOAT || \ + GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT)) + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM (GP_REG_FIRST + 1) + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 7) + +/* The register number of the frame pointer register, which is used to + access automatic variables in the stack frame. For Xtensa, this + register never appears in the output. It is always eliminated to + either the stack pointer or the hard frame pointer. */ +#define FRAME_POINTER_REGNUM (GP_REG_FIRST + 16) + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM (GP_REG_FIRST + 17) + +/* For now we don't try to use the full set of boolean registers. Without + software pipelining of FP operations, there's not much to gain and it's + a real pain to get them reloaded. */ +#define FPCC_REGNUM (BR_REG_FIRST + 0) + +/* It is as good or better to call a constant function address than to + call an address kept in a register. */ +#define NO_FUNCTION_CSE 1 + +/* Xtensa processors have "register windows". GCC does not currently + take advantage of the possibility for variable-sized windows; instead, + we use a fixed window size of 8. */ + +#define INCOMING_REGNO(OUT) \ + ((GP_REG_P (OUT) && \ + ((unsigned) ((OUT) - GP_REG_FIRST) >= WINDOW_SIZE)) ? \ + (OUT) - WINDOW_SIZE : (OUT)) + +#define OUTGOING_REGNO(IN) \ + ((GP_REG_P (IN) && \ + ((unsigned) ((IN) - GP_REG_FIRST) < WINDOW_SIZE)) ? \ + (IN) + WINDOW_SIZE : (IN)) + + +/* Define the classes of registers for register constraints in the + machine description. */ +enum reg_class +{ + NO_REGS, /* no registers in set */ + BR_REGS, /* coprocessor boolean registers */ + FP_REGS, /* floating point registers */ + ACC_REG, /* MAC16 accumulator */ + SP_REG, /* sp register (aka a1) */ + RL_REGS, /* preferred reload regs (not sp or fp) */ + GR_REGS, /* integer registers except sp */ + AR_REGS, /* all integer registers */ + ALL_REGS, /* all registers */ + LIM_REG_CLASSES /* max value + 1 */ +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +#define GENERAL_REGS AR_REGS + +/* An initializer containing the names of the register classes as C + string constants. These names are used in writing some of the + debugging dumps. */ +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "BR_REGS", \ + "FP_REGS", \ + "ACC_REG", \ + "SP_REG", \ + "RL_REGS", \ + "GR_REGS", \ + "AR_REGS", \ + "ALL_REGS" \ +} + +/* Contents of the register classes. The Nth integer specifies the + contents of class N. The way the integer MASK is interpreted is + that register R is in the class if 'MASK & (1 << R)' is 1. */ +#define REG_CLASS_CONTENTS \ +{ \ + { 0x00000000, 0x00000000 }, /* no registers */ \ + { 0x00040000, 0x00000000 }, /* coprocessor boolean registers */ \ + { 0xfff80000, 0x00000007 }, /* floating-point registers */ \ + { 0x00000000, 0x00000008 }, /* MAC16 accumulator */ \ + { 0x00000002, 0x00000000 }, /* stack pointer register */ \ + { 0x0000ff7d, 0x00000000 }, /* preferred reload registers */ \ + { 0x0000fffd, 0x00000000 }, /* general-purpose registers */ \ + { 0x0003ffff, 0x00000000 }, /* integer registers */ \ + { 0xffffffff, 0x0000000f } /* all registers */ \ +} + +#define IRA_COVER_CLASSES \ +{ \ + BR_REGS, FP_REGS, ACC_REG, AR_REGS, LIM_REG_CLASSES \ +} + +/* A C expression whose value is a register class containing hard + register REGNO. In general there is more that one such class; + choose a class which is "minimal", meaning that no smaller class + also contains the register. */ +extern const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER]; + +#define REGNO_REG_CLASS(REGNO) xtensa_regno_to_class[ (REGNO) ] + +/* Use the Xtensa AR register file for base registers. + No index registers. */ +#define BASE_REG_CLASS AR_REGS +#define INDEX_REG_CLASS NO_REGS + +/* The small_register_classes_for_mode_p hook must always return true for + Xtrnase, because all of the 16 AR registers may be explicitly used in + the RTL, as either incoming or outgoing arguments. */ +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ +#define CLASS_UNITS(mode, size) \ + ((GET_MODE_SIZE (mode) + (size) - 1) / (size)) + +#define CLASS_MAX_NREGS(CLASS, MODE) \ + (CLASS_UNITS (MODE, UNITS_PER_WORD)) + + +/* Stack layout; function entry, exit and calling. */ + +#define STACK_GROWS_DOWNWARD + +/* Offset within stack frame to start allocating local variables at. */ +#define STARTING_FRAME_OFFSET \ + crtl->outgoing_args_size + +/* The ARG_POINTER and FRAME_POINTER are not real Xtensa registers, so + they are eliminated to either the stack pointer or hard frame pointer. */ +#define ELIMINABLE_REGS \ +{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} + +/* Specify the initial difference between the specified pair of registers. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + do { \ + compute_frame_size (get_frame_size ()); \ + switch (FROM) \ + { \ + case FRAME_POINTER_REGNUM: \ + (OFFSET) = 0; \ + break; \ + case ARG_POINTER_REGNUM: \ + (OFFSET) = xtensa_current_frame_size; \ + break; \ + default: \ + gcc_unreachable (); \ + } \ + } while (0) + +/* If defined, the maximum amount of space required for outgoing + arguments will be computed and placed into the variable + 'crtl->outgoing_args_size'. No space will be pushed + onto the stack for each call; instead, the function prologue + should increase the stack frame size by this amount. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Offset from the argument pointer register to the first argument's + address. On some machines it may depend on the data type of the + function. If 'ARGS_GROW_DOWNWARD', this is the offset to the + location above the first argument's address. */ +#define FIRST_PARM_OFFSET(FNDECL) 0 + +/* Align stack frames on 128 bits for Xtensa. This is necessary for + 128-bit datatypes defined in TIE (e.g., for Vectra). */ +#define STACK_BOUNDARY 128 + +/* Use a fixed register window size of 8. */ +#define WINDOW_SIZE 8 + +/* Symbolic macros for the registers used to return integer, floating + point, and values of coprocessor and user-defined modes. */ +#define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE) +#define GP_OUTGOING_RETURN (GP_REG_FIRST + 2) + +/* Symbolic macros for the first/last argument registers. */ +#define GP_ARG_FIRST (GP_REG_FIRST + 2) +#define GP_ARG_LAST (GP_REG_FIRST + 7) +#define GP_OUTGOING_ARG_FIRST (GP_REG_FIRST + 2 + WINDOW_SIZE) +#define GP_OUTGOING_ARG_LAST (GP_REG_FIRST + 7 + WINDOW_SIZE) + +#define MAX_ARGS_IN_REGISTERS 6 + +/* Don't worry about compatibility with PCC. */ +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* A C expression that is nonzero if REGNO is the number of a hard + register in which function arguments are sometimes passed. This + does *not* include implicit arguments such as the static chain and + the structure-value address. On many machines, no registers can be + used for this purpose since all function arguments are pushed on + the stack. */ +#define FUNCTION_ARG_REGNO_P(N) \ + ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST) + +/* Record the number of argument words seen so far, along with a flag to + indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG + is used for both incoming and outgoing args, so a separate flag is + needed. */ +typedef struct xtensa_args +{ + int arg_words; + int incoming; +} CUMULATIVE_ARGS; + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ + init_cumulative_args (&CUM, 0) + +#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \ + init_cumulative_args (&CUM, 1) + +/* Profiling Xtensa code is typically done with the built-in profiling + feature of Tensilica's instruction set simulator, which does not + require any compiler support. Profiling code on a real (i.e., + non-simulated) Xtensa processor is currently only supported by + GNU/Linux with glibc. The glibc version of _mcount doesn't require + counter variables. The _mcount function needs the current PC and + the current return address to identify an arc in the call graph. + Pass the current return address as the first argument; the current + PC is available as a0 in _mcount's register window. Both of these + values contain window size information in the two most significant + bits; we assume that _mcount will mask off those bits. The call to + _mcount uses a window size of 8 to make sure that it doesn't clobber + any incoming argument values. */ + +#define NO_PROFILE_COUNTERS 1 + +#define FUNCTION_PROFILER(FILE, LABELNO) \ + do { \ + fprintf (FILE, "\t%s\ta10, a0\n", TARGET_DENSITY ? "mov.n" : "mov"); \ + if (flag_pic) \ + { \ + fprintf (FILE, "\tmovi\ta8, _mcount@PLT\n"); \ + fprintf (FILE, "\tcallx8\ta8\n"); \ + } \ + else \ + fprintf (FILE, "\tcall8\t_mcount\n"); \ + } while (0) + +/* Stack pointer value doesn't matter at exit. */ +#define EXIT_IGNORE_STACK 1 + +/* Size in bytes of the trampoline, as an integer. Make sure this is + a multiple of TRAMPOLINE_ALIGNMENT to avoid -Wpadded warnings. */ +#define TRAMPOLINE_SIZE (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS ? 60 : 52) + +/* Alignment required for trampolines, in bits. */ +#define TRAMPOLINE_ALIGNMENT 32 + +/* If defined, a C expression that produces the machine-specific code + to setup the stack so that arbitrary frames can be accessed. + + On Xtensa, a stack back-trace must always begin from the stack pointer, + so that the register overflow save area can be located. However, the + stack-walking code in GCC always begins from the hard_frame_pointer + register, not the stack pointer. The frame pointer is usually equal + to the stack pointer, but the __builtin_return_address and + __builtin_frame_address functions will not work if count > 0 and + they are called from a routine that uses alloca. These functions + are not guaranteed to work at all if count > 0 so maybe that is OK. + + A nicer solution would be to allow the architecture-specific files to + specify whether to start from the stack pointer or frame pointer. That + would also allow us to skip the machine->accesses_prev_frame stuff that + we currently need to ensure that there is a frame pointer when these + builtin functions are used. */ + +#define SETUP_FRAME_ADDRESSES xtensa_setup_frame_addresses + +/* A C expression whose value is RTL representing the address in a + stack frame where the pointer to the caller's frame is stored. + Assume that FRAMEADDR is an RTL expression for the address of the + stack frame itself. + + For Xtensa, there is no easy way to get the frame pointer if it is + not equivalent to the stack pointer. Moreover, the result of this + macro is used for continuing to walk back up the stack, so it must + return the stack pointer address. Thus, there is some inconsistency + here in that __builtin_frame_address will return the frame pointer + when count == 0 and the stack pointer when count > 0. */ + +#define DYNAMIC_CHAIN_ADDRESS(frame) \ + gen_rtx_PLUS (Pmode, frame, GEN_INT (-3 * UNITS_PER_WORD)) + +/* Define this if the return address of a particular stack frame is + accessed from the frame pointer of the previous stack frame. */ +#define RETURN_ADDR_IN_PREVIOUS_FRAME + +/* A C expression whose value is RTL representing the value of the + return address for the frame COUNT steps up from the current + frame, after the prologue. */ +#define RETURN_ADDR_RTX xtensa_return_addr + +/* Addressing modes, and classification of registers for them. */ + +/* C expressions which are nonzero if register number NUM is suitable + for use as a base or index register in operand addresses. */ + +#define REGNO_OK_FOR_INDEX_P(NUM) 0 +#define REGNO_OK_FOR_BASE_P(NUM) \ + (GP_REG_P (NUM) || GP_REG_P ((unsigned) reg_renumber[NUM])) + +/* C expressions that are nonzero if X (assumed to be a `reg' RTX) is + valid for use as a base or index register. */ + +#ifdef REG_OK_STRICT +#define REG_OK_STRICT_FLAG 1 +#else +#define REG_OK_STRICT_FLAG 0 +#endif + +#define BASE_REG_P(X, STRICT) \ + ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER) \ + || REGNO_OK_FOR_BASE_P (REGNO (X))) + +#define REG_OK_FOR_INDEX_P(X) 0 +#define REG_OK_FOR_BASE_P(X) BASE_REG_P (X, REG_OK_STRICT_FLAG) + +/* Maximum number of registers that can appear in a valid memory address. */ +#define MAX_REGS_PER_ADDRESS 1 + +/* A C expression that is 1 if the RTX X is a constant which is a + valid address. This is defined to be the same as 'CONSTANT_P (X)', + but rejecting CONST_DOUBLE. */ +#define CONSTANT_ADDRESS_P(X) \ + ((GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH \ + || (GET_CODE (X) == CONST))) + +/* Nonzero if the constant value X is a legitimate general operand. + It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ +#define LEGITIMATE_CONSTANT_P(X) (! xtensa_tls_referenced_p (X)) + +/* A C expression that is nonzero if X is a legitimate immediate + operand on the target machine when generating position independent + code. */ +#define LEGITIMATE_PIC_OPERAND_P(X) \ + ((GET_CODE (X) != SYMBOL_REF \ + || (SYMBOL_REF_LOCAL_P (X) && !SYMBOL_REF_EXTERNAL_P (X))) \ + && GET_CODE (X) != LABEL_REF \ + && GET_CODE (X) != CONST) + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE (SImode) + +/* Define this as 1 if 'char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 0 + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 4 +#define MAX_MOVE_MAX 4 + +/* Prefer word-sized loads. */ +#define SLOW_BYTE_ACCESS 1 + +/* Shift instructions ignore all but the low-order few bits. */ +#define SHIFT_COUNT_TRUNCATED 1 + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 1) + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +#define Pmode SImode + +/* A function address in a call instruction is a word address (for + indexing purposes) so give the MEM rtx a words's mode. */ +#define FUNCTION_MODE SImode + +#define BRANCH_COST(speed_p, predictable_p) 3 + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ +#define REGISTER_NAMES \ +{ \ + "a0", "sp", "a2", "a3", "a4", "a5", "a6", "a7", \ + "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", \ + "fp", "argp", "b0", \ + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ + "acc" \ +} + +/* If defined, a C initializer for an array of structures containing a + name and a register number. This macro defines additional names + for hard registers, thus allowing the 'asm' option in declarations + to refer to registers using alternate names. */ +#define ADDITIONAL_REGISTER_NAMES \ +{ \ + { "a1", 1 + GP_REG_FIRST } \ +} + +#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE) +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR) + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global\t" + +/* Declare an uninitialized external linkage data object. */ +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) + +/* This is how to output an element of a case-vector that is absolute. */ +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ + fprintf (STREAM, "%s%sL%u\n", integer_asm_op (4, TRUE), \ + LOCAL_LABEL_PREFIX, VALUE) + +/* This is how to output an element of a case-vector that is relative. + This is used for pc-relative code. */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ + do { \ + fprintf (STREAM, "%s%sL%u-%sL%u\n", integer_asm_op (4, TRUE), \ + LOCAL_LABEL_PREFIX, (VALUE), \ + LOCAL_LABEL_PREFIX, (REL)); \ + } while (0) + +/* This is how to output an assembler line that says to advance the + location counter to a multiple of 2**LOG bytes. */ +#define ASM_OUTPUT_ALIGN(STREAM, LOG) \ + do { \ + if ((LOG) != 0) \ + fprintf (STREAM, "\t.align\t%d\n", 1 << (LOG)); \ + } while (0) + +/* Indicate that jump tables go in the text section. This is + necessary when compiling PIC code. */ +#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic) + + +/* Define the strings to put out for each section in the object file. */ +#define TEXT_SECTION_ASM_OP "\t.text" +#define DATA_SECTION_ASM_OP "\t.data" +#define BSS_SECTION_ASM_OP "\t.section\t.bss" + + +/* Define output to appear before the constant pool. */ +#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE) \ + do { \ + if ((SIZE) > 0) \ + { \ + resolve_unique_section ((FUNDECL), 0, flag_function_sections); \ + switch_to_section (function_section (FUNDECL)); \ + fprintf (FILE, "\t.literal_position\n"); \ + } \ + } while (0) + + +/* A C statement (with or without semicolon) to output a constant in + the constant pool, if it needs special treatment. */ +#define ASM_OUTPUT_SPECIAL_POOL_ENTRY(FILE, X, MODE, ALIGN, LABELNO, JUMPTO) \ + do { \ + xtensa_output_literal (FILE, X, MODE, LABELNO); \ + goto JUMPTO; \ + } while (0) + +/* How to start an assembler comment. */ +#define ASM_COMMENT_START "#" + +/* Exception handling. Xtensa uses much of the standard DWARF2 unwinding + machinery, but the variable size register window save areas are too + complicated to efficiently describe with CFI entries. The CFA must + still be specified in DWARF so that DW_AT_frame_base is set correctly + for debugging. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 0) +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (0) +#define DWARF_FRAME_REGISTERS 16 +#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) + 2 : INVALID_REGNUM) +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + (flag_pic \ + ? (((GLOBAL) ? DW_EH_PE_indirect : 0) \ + | DW_EH_PE_pcrel | DW_EH_PE_sdata4) \ + : DW_EH_PE_absptr) + +/* Emit a PC-relative relocation. */ +#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL) \ + do { \ + fputs (integer_asm_op (SIZE, FALSE), FILE); \ + assemble_name (FILE, LABEL); \ + fputs ("@pcrel", FILE); \ + } while (0) + +/* Xtensa constant pool breaks the devices in crtstuff.c to control + section in where code resides. We have to write it as asm code. Use + a MOVI and let the assembler relax it -- for the .init and .fini + sections, the assembler knows to put the literal in the right + place. */ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\ + movi\ta8, " USER_LABEL_PREFIX #FUNC "\n\ + callx8\ta8\n" \ + TEXT_SECTION_ASM_OP); diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md new file mode 100644 index 000000000..d6eb54891 --- /dev/null +++ b/gcc/config/xtensa/xtensa.md @@ -0,0 +1,1914 @@ +;; GCC machine description for Tensilica's Xtensa architecture. +;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 +;; Free Software Foundation, Inc. +;; Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + + +(define_constants [ + (A0_REG 0) + (A1_REG 1) + (A7_REG 7) + (A8_REG 8) + + (UNSPEC_NOP 2) + (UNSPEC_PLT 3) + (UNSPEC_RET_ADDR 4) + (UNSPEC_TPOFF 5) + (UNSPEC_DTPOFF 6) + (UNSPEC_TLS_FUNC 7) + (UNSPEC_TLS_ARG 8) + (UNSPEC_TLS_CALL 9) + (UNSPEC_TP 10) + (UNSPEC_MEMW 11) + + (UNSPECV_SET_FP 1) + (UNSPECV_ENTRY 2) + (UNSPECV_S32RI 4) + (UNSPECV_S32C1I 5) + (UNSPECV_EH_RETURN 6) + (UNSPECV_SET_TP 7) +]) + +;; This code iterator allows signed and unsigned widening multiplications +;; to use the same template. +(define_code_iterator any_extend [sign_extend zero_extend]) + +;; <u> expands to an empty string when doing a signed operation and +;; "u" when doing an unsigned operation. +(define_code_attr u [(sign_extend "") (zero_extend "u")]) + +;; <su> is like <u>, but the signed form expands to "s" rather than "". +(define_code_attr su [(sign_extend "s") (zero_extend "u")]) + +;; This code iterator allows four integer min/max operations to be +;; generated from one template. +(define_code_iterator any_minmax [smin umin smax umax]) + +;; <minmax> expands to the opcode name for any_minmax operations. +(define_code_attr minmax [(smin "min") (umin "minu") + (smax "max") (umax "maxu")]) + +;; This code iterator is for floating-point comparisons. +(define_code_iterator any_scc_sf [eq lt le uneq unlt unle unordered]) +(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole") + (uneq "ueq") (unlt "ult") (unle "ule") + (unordered "un")]) + +;; This iterator and attribute allow to combine most atomic operations. +(define_code_iterator ATOMIC [and ior xor plus minus mult]) +(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") + (plus "add") (minus "sub") (mult "nand")]) + +;; This mode iterator allows the HI and QI patterns to be defined from +;; the same template. +(define_mode_iterator HQI [HI QI]) + + +;; Attributes. + +(define_attr "type" + "unknown,jump,call,load,store,move,arith,multi,nop,farith,fmadd,fdiv,fsqrt,fconv,fload,fstore,mul16,mul32,div32,mac16,rsr,wsr,entry" + (const_string "unknown")) + +(define_attr "mode" + "unknown,none,QI,HI,SI,DI,SF,DF,BL" + (const_string "unknown")) + +(define_attr "length" "" (const_int 1)) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "type" "multi")]) + + +;; Pipeline model. + +;; The Xtensa basically has simple 5-stage RISC pipeline. +;; Most instructions complete in 1 cycle, and it is OK to assume that +;; everything is fully pipelined. The exceptions have special insn +;; reservations in the pipeline description below. The Xtensa can +;; issue one instruction per cycle, so defining CPU units is unnecessary. + +(define_insn_reservation "xtensa_any_insn" 1 + (eq_attr "type" "!load,fload,rsr,mul16,mul32,fmadd,fconv") + "nothing") + +(define_insn_reservation "xtensa_memory" 2 + (eq_attr "type" "load,fload") + "nothing") + +(define_insn_reservation "xtensa_sreg" 2 + (eq_attr "type" "rsr") + "nothing") + +(define_insn_reservation "xtensa_mul16" 2 + (eq_attr "type" "mul16") + "nothing") + +(define_insn_reservation "xtensa_mul32" 2 + (eq_attr "type" "mul32") + "nothing") + +(define_insn_reservation "xtensa_fmadd" 4 + (eq_attr "type" "fmadd") + "nothing") + +(define_insn_reservation "xtensa_fconv" 2 + (eq_attr "type" "fconv") + "nothing") + +;; Include predicates and constraints. + +(include "predicates.md") +(include "constraints.md") + + +;; Addition. + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a") + (plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r") + (match_operand:SI 2 "add_operand" "d,O,r,J,N")))] + "" + "@ + add.n\t%0, %1, %2 + addi.n\t%0, %1, %d2 + add\t%0, %1, %2 + addi\t%0, %1, %d2 + addmi\t%0, %1, %x2" + [(set_attr "type" "arith,arith,arith,arith,arith") + (set_attr "mode" "SI") + (set_attr "length" "2,2,3,3,3")]) + +(define_insn "*addx" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 3 "addsubx_operand" "i")) + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_ADDX" + "addx%3\t%0, %1, %2" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "addsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (plus:SF (match_operand:SF 1 "register_operand" "%f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "add.s\t%0, %1, %2" + [(set_attr "type" "fmadd") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + + +;; Subtraction. + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "sub\t%0, %1, %2" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "*subx" + [(set (match_operand:SI 0 "register_operand" "=a") + (minus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 3 "addsubx_operand" "i")) + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_ADDX" + "subx%3\t%0, %1, %2" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "subsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (minus:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "sub.s\t%0, %1, %2" + [(set_attr "type" "fmadd") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + + +;; Multiplication. + +(define_expand "<u>mulsidi3" + [(set (match_operand:DI 0 "register_operand") + (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand")) + (any_extend:DI (match_operand:SI 2 "register_operand"))))] + "TARGET_MUL32_HIGH" +{ + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); + emit_insn (gen_<u>mulsi3_highpart (gen_highpart (SImode, operands[0]), + operands[1], operands[2])); + emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp)); + DONE; +}) + +(define_insn "<u>mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=a") + (truncate:SI + (lshiftrt:DI + (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "%r")) + (any_extend:DI (match_operand:SI 2 "register_operand" "r"))) + (const_int 32))))] + "TARGET_MUL32_HIGH" + "mul<su>h\t%0, %1, %2" + [(set_attr "type" "mul32") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "mulsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (mult:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_MUL32" + "mull\t%0, %1, %2" + [(set_attr "type" "mul32") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=C,A") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "%r,r")) + (sign_extend:SI + (match_operand:HI 2 "register_operand" "r,r"))))] + "TARGET_MUL16 || TARGET_MAC16" + "@ + mul16s\t%0, %1, %2 + mul.aa.ll\t%1, %2" + [(set_attr "type" "mul16,mac16") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + +(define_insn "umulhisi3" + [(set (match_operand:SI 0 "register_operand" "=C,A") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "%r,r")) + (zero_extend:SI + (match_operand:HI 2 "register_operand" "r,r"))))] + "TARGET_MUL16 || TARGET_MAC16" + "@ + mul16u\t%0, %1, %2 + umul.aa.ll\t%1, %2" + [(set_attr "type" "mul16,mac16") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + +(define_insn "muladdhisi" + [(set (match_operand:SI 0 "register_operand" "=A") + (plus:SI (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "%r")) + (sign_extend:SI + (match_operand:HI 2 "register_operand" "r"))) + (match_operand:SI 3 "register_operand" "0")))] + "TARGET_MAC16" + "mula.aa.ll\t%1, %2" + [(set_attr "type" "mac16") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "mulsubhisi" + [(set (match_operand:SI 0 "register_operand" "=A") + (minus:SI (match_operand:SI 1 "register_operand" "0") + (mult:SI (sign_extend:SI + (match_operand:HI 2 "register_operand" "%r")) + (sign_extend:SI + (match_operand:HI 3 "register_operand" "r")))))] + "TARGET_MAC16" + "muls.aa.ll\t%2, %3" + [(set_attr "type" "mac16") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "mulsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (mult:SF (match_operand:SF 1 "register_operand" "%f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "mul.s\t%0, %1, %2" + [(set_attr "type" "fmadd") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + +(define_insn "fmasf4" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f") + (match_operand:SF 3 "register_operand" "0")))] + "TARGET_HARD_FLOAT" + "madd.s\t%0, %1, %2" + [(set_attr "type" "fmadd") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + +;; Note that (C - A*B) = (-A*B + C) +(define_insn "fnmasf4" + [(set (match_operand:SF 0 "register_operand" "=f") + (fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f")) + (match_operand:SF 2 "register_operand" "f") + (match_operand:SF 3 "register_operand" "0")))] + "TARGET_HARD_FLOAT" + "msub.s\t%0, %1, %2" + [(set_attr "type" "fmadd") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + + +;; Division. + +(define_insn "divsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_DIV32" + "quos\t%0, %1, %2" + [(set_attr "type" "div32") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "udivsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_DIV32" + "quou\t%0, %1, %2" + [(set_attr "type" "div32") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "divsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (div:SF (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT_DIV" + "div.s\t%0, %1, %2" + [(set_attr "type" "fdiv") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + +(define_insn "*recipsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (div:SF (match_operand:SF 1 "const_float_1_operand" "") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT_RECIP && flag_unsafe_math_optimizations" + "recip.s\t%0, %2" + [(set_attr "type" "fdiv") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + + +;; Remainders. + +(define_insn "modsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (mod:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_DIV32" + "rems\t%0, %1, %2" + [(set_attr "type" "div32") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "umodsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (umod:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_DIV32" + "remu\t%0, %1, %2" + [(set_attr "type" "div32") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + + +;; Square roots. + +(define_insn "sqrtsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (sqrt:SF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT_SQRT" + "sqrt.s\t%0, %1" + [(set_attr "type" "fsqrt") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + +(define_insn "*rsqrtsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (div:SF (match_operand:SF 1 "const_float_1_operand" "") + (sqrt:SF (match_operand:SF 2 "register_operand" "f"))))] + "TARGET_HARD_FLOAT_RSQRT && flag_unsafe_math_optimizations" + "rsqrt.s\t%0, %2" + [(set_attr "type" "fsqrt") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + + +;; Absolute value. + +(define_insn "abssi2" + [(set (match_operand:SI 0 "register_operand" "=a") + (abs:SI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_ABS" + "abs\t%0, %1" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "abssf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (abs:SF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "abs.s\t%0, %1" + [(set_attr "type" "farith") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + + +;; Min and max. + +(define_insn "<code>si3" + [(set (match_operand:SI 0 "register_operand" "=a") + (any_minmax:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_MINMAX" + "<minmax>\t%0, %1, %2" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + + +;; Count leading/trailing zeros and find first bit. + +(define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=a") + (clz:SI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_NSA" + "nsau\t%0, %1" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_expand "ctzsi2" + [(set (match_operand:SI 0 "register_operand" "") + (ctz:SI (match_operand:SI 1 "register_operand" "")))] + "TARGET_NSA" +{ + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_negsi2 (temp, operands[1])); + emit_insn (gen_andsi3 (temp, temp, operands[1])); + emit_insn (gen_clzsi2 (temp, temp)); + emit_insn (gen_negsi2 (temp, temp)); + emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (31))); + DONE; +}) + +(define_expand "ffssi2" + [(set (match_operand:SI 0 "register_operand" "") + (ffs:SI (match_operand:SI 1 "register_operand" "")))] + "TARGET_NSA" +{ + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_negsi2 (temp, operands[1])); + emit_insn (gen_andsi3 (temp, temp, operands[1])); + emit_insn (gen_clzsi2 (temp, temp)); + emit_insn (gen_negsi2 (temp, temp)); + emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32))); + DONE; +}) + + +;; Negation and one's complement. + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=a") + (neg:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "neg\t%0, %1" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "") + (not:SI (match_operand:SI 1 "register_operand" "")))] + "" +{ + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_movsi (temp, constm1_rtx)); + emit_insn (gen_xorsi3 (operands[0], temp, operands[1])); + DONE; +}) + +(define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "neg.s\t%0, %1" + [(set_attr "type" "farith") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + + +;; Logical instructions. + +(define_insn "andsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (and:SI (match_operand:SI 1 "register_operand" "%r,r") + (match_operand:SI 2 "mask_operand" "P,r")))] + "" + "@ + extui\t%0, %1, 0, %K2 + and\t%0, %1, %2" + [(set_attr "type" "arith,arith") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (ior:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "or\t%0, %1, %2" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=a") + (xor:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "xor\t%0, %1, %2" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + + +;; Zero-extend instructions. + +(define_insn "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (zero_extend:SI (match_operand:HI 1 "nonimmed_operand" "r,U")))] + "" + "@ + extui\t%0, %1, 0, 16 + l16ui\t%0, %1" + [(set_attr "type" "arith,load") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + +(define_insn "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (zero_extend:SI (match_operand:QI 1 "nonimmed_operand" "r,U")))] + "" + "@ + extui\t%0, %1, 0, 8 + l8ui\t%0, %1" + [(set_attr "type" "arith,load") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + + +;; Sign-extend instructions. + +(define_expand "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:HI 1 "register_operand" "")))] + "" +{ + if (sext_operand (operands[1], HImode)) + emit_insn (gen_extendhisi2_internal (operands[0], operands[1])); + else + xtensa_extend_reg (operands[0], operands[1]); + DONE; +}) + +(define_insn "extendhisi2_internal" + [(set (match_operand:SI 0 "register_operand" "=B,a") + (sign_extend:SI (match_operand:HI 1 "sext_operand" "r,U")))] + "" + "@ + sext\t%0, %1, 15 + l16si\t%0, %1" + [(set_attr "type" "arith,load") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + +(define_expand "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:QI 1 "register_operand" "")))] + "" +{ + if (TARGET_SEXT) + emit_insn (gen_extendqisi2_internal (operands[0], operands[1])); + else + xtensa_extend_reg (operands[0], operands[1]); + DONE; +}) + +(define_insn "extendqisi2_internal" + [(set (match_operand:SI 0 "register_operand" "=B") + (sign_extend:SI (match_operand:QI 1 "register_operand" "r")))] + "TARGET_SEXT" + "sext\t%0, %1, 7" + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + + +;; Field extract instructions. + +(define_expand "extv" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")))] + "TARGET_SEXT" +{ + if (!sext_fldsz_operand (operands[2], SImode)) + FAIL; + + /* We could expand to a right shift followed by SEXT but that's + no better than the standard left and right shift sequence. */ + if (!lsbitnum_operand (operands[3], SImode)) + FAIL; + + emit_insn (gen_extv_internal (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +(define_insn "extv_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "sext_fldsz_operand" "i") + (match_operand:SI 3 "lsbitnum_operand" "i")))] + "TARGET_SEXT" +{ + int fldsz = INTVAL (operands[2]); + operands[2] = GEN_INT (fldsz - 1); + return "sext\t%0, %1, %2"; +} + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_expand "extzv" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")))] + "" +{ + if (!extui_fldsz_operand (operands[2], SImode)) + FAIL; + emit_insn (gen_extzv_internal (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +(define_insn "extzv_internal" + [(set (match_operand:SI 0 "register_operand" "=a") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "extui_fldsz_operand" "i") + (match_operand:SI 3 "const_int_operand" "i")))] + "" +{ + int shift; + if (BITS_BIG_ENDIAN) + shift = (32 - (INTVAL (operands[2]) + INTVAL (operands[3]))) & 0x1f; + else + shift = INTVAL (operands[3]) & 0x1f; + operands[3] = GEN_INT (shift); + return "extui\t%0, %1, %3, %2"; +} + [(set_attr "type" "arith") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + + +;; Conversions. + +(define_insn "fix_truncsfsi2" + [(set (match_operand:SI 0 "register_operand" "=a") + (fix:SI (match_operand:SF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "trunc.s\t%0, %1, 0" + [(set_attr "type" "fconv") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + +(define_insn "fixuns_truncsfsi2" + [(set (match_operand:SI 0 "register_operand" "=a") + (unsigned_fix:SI (match_operand:SF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "utrunc.s\t%0, %1, 0" + [(set_attr "type" "fconv") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + +(define_insn "floatsisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:SI 1 "register_operand" "a")))] + "TARGET_HARD_FLOAT" + "float.s\t%0, %1, 0" + [(set_attr "type" "fconv") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + +(define_insn "floatunssisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (unsigned_float:SF (match_operand:SI 1 "register_operand" "a")))] + "TARGET_HARD_FLOAT" + "ufloat.s\t%0, %1, 0" + [(set_attr "type" "fconv") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + + +;; Data movement instructions. + +;; 64-bit Integer moves + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmed_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" +{ + if (CONSTANT_P (operands[1]) && !TARGET_CONST16) + operands[1] = force_const_mem (DImode, operands[1]); + + if (!register_operand (operands[0], DImode) + && !register_operand (operands[1], DImode)) + operands[1] = force_reg (DImode, operands[1]); + + operands[1] = xtensa_copy_incoming_a7 (operands[1]); +}) + +(define_insn_and_split "movdi_internal" + [(set (match_operand:DI 0 "nonimmed_operand" "=a,W,a,a,U") + (match_operand:DI 1 "move_operand" "r,i,T,U,r"))] + "register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)" + "#" + "reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] +{ + xtensa_split_operand_pair (operands, SImode); + if (reg_overlap_mentioned_p (operands[0], operands[3])) + { + rtx tmp; + tmp = operands[0], operands[0] = operands[1], operands[1] = tmp; + tmp = operands[2], operands[2] = operands[3], operands[3] = tmp; + } +}) + +;; 32-bit Integer moves + +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmed_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" +{ + if (xtensa_emit_move_sequence (operands, SImode)) + DONE; +}) + +(define_insn "movsi_internal" + [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,W,a,a,U,*a,*A") + (match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,i,T,U,r,*A,*r"))] + "xtensa_valid_move (SImode, operands)" + "@ + movi.n\t%0, %x1 + mov.n\t%0, %1 + mov.n\t%0, %1 + %v1l32i.n\t%0, %1 + %v0s32i.n\t%1, %0 + %v0s32i.n\t%1, %0 + mov\t%0, %1 + movsp\t%0, %1 + movi\t%0, %x1 + const16\t%0, %t1\;const16\t%0, %b1 + %v1l32r\t%0, %1 + %v1l32i\t%0, %1 + %v0s32i\t%1, %0 + rsr\t%0, ACCLO + wsr\t%1, ACCLO" + [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,load,load,store,rsr,wsr") + (set_attr "mode" "SI") + (set_attr "length" "2,2,2,2,2,2,3,3,3,6,3,3,3,3,3")]) + +;; 16-bit Integer moves + +(define_expand "movhi" + [(set (match_operand:HI 0 "nonimmed_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" +{ + if (xtensa_emit_move_sequence (operands, HImode)) + DONE; +}) + +(define_insn "movhi_internal" + [(set (match_operand:HI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A") + (match_operand:HI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))] + "xtensa_valid_move (HImode, operands)" + "@ + movi.n\t%0, %x1 + mov.n\t%0, %1 + mov\t%0, %1 + movi\t%0, %x1 + %v1l16ui\t%0, %1 + %v0s16i\t%1, %0 + rsr\t%0, ACCLO + wsr\t%1, ACCLO" + [(set_attr "type" "move,move,move,move,load,store,rsr,wsr") + (set_attr "mode" "HI") + (set_attr "length" "2,2,3,3,3,3,3,3")]) + +;; 8-bit Integer moves + +(define_expand "movqi" + [(set (match_operand:QI 0 "nonimmed_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" +{ + if (xtensa_emit_move_sequence (operands, QImode)) + DONE; +}) + +(define_insn "movqi_internal" + [(set (match_operand:QI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A") + (match_operand:QI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))] + "xtensa_valid_move (QImode, operands)" + "@ + movi.n\t%0, %x1 + mov.n\t%0, %1 + mov\t%0, %1 + movi\t%0, %x1 + %v1l8ui\t%0, %1 + %v0s8i\t%1, %0 + rsr\t%0, ACCLO + wsr\t%1, ACCLO" + [(set_attr "type" "move,move,move,move,load,store,rsr,wsr") + (set_attr "mode" "QI") + (set_attr "length" "2,2,3,3,3,3,3,3")]) + +;; Sub-word reloads from the constant pool. + +(define_expand "reload<mode>_literal" + [(parallel [(match_operand:HQI 0 "register_operand" "=r") + (match_operand:HQI 1 "constantpool_operand" "") + (match_operand:SI 2 "register_operand" "=&r")])] + "" +{ + rtx lit, scratch; + unsigned word_off, byte_off; + + if (MEM_P (operands[1])) + { + lit = operands[1]; + word_off = 0; + byte_off = 0; + } + else + { + gcc_assert (GET_CODE (operands[1]) == SUBREG); + lit = SUBREG_REG (operands[1]); + word_off = SUBREG_BYTE (operands[1]) & ~(UNITS_PER_WORD - 1); + byte_off = SUBREG_BYTE (operands[1]) - word_off; + } + + lit = adjust_address (lit, SImode, word_off); + scratch = operands[2]; + emit_insn (gen_movsi (scratch, lit)); + emit_insn (gen_mov<mode> (operands[0], + gen_rtx_SUBREG (<MODE>mode, scratch, byte_off))); + + DONE; +}) + +;; 32-bit floating point moves + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmed_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" +{ + if (!TARGET_CONST16 && CONSTANT_P (operands[1])) + operands[1] = force_const_mem (SFmode, operands[1]); + + if ((!register_operand (operands[0], SFmode) + && !register_operand (operands[1], SFmode)) + || (FP_REG_P (xt_true_regnum (operands[0])) + && !(reload_in_progress | reload_completed) + && (constantpool_mem_p (operands[1]) + || CONSTANT_P (operands[1])))) + operands[1] = force_reg (SFmode, operands[1]); + + operands[1] = xtensa_copy_incoming_a7 (operands[1]); +}) + +(define_insn "movsf_internal" + [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,D,R,a,f,a,W,a,a,U") + (match_operand:SF 1 "move_operand" "f,U,f,d,R,d,r,r,f,iF,T,U,r"))] + "((register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode)) + && !(FP_REG_P (xt_true_regnum (operands[0])) + && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))" + "@ + mov.s\t%0, %1 + %v1lsi\t%0, %1 + %v0ssi\t%1, %0 + mov.n\t%0, %1 + %v1l32i.n\t%0, %1 + %v0s32i.n\t%1, %0 + mov\t%0, %1 + wfr\t%0, %1 + rfr\t%0, %1 + const16\t%0, %t1\;const16\t%0, %b1 + %v1l32r\t%0, %1 + %v1l32i\t%0, %1 + %v0s32i\t%1, %0" + [(set_attr "type" "farith,fload,fstore,move,load,store,move,farith,farith,move,load,load,store") + (set_attr "mode" "SF") + (set_attr "length" "3,3,3,2,2,2,3,3,3,6,3,3,3")]) + +(define_insn "*lsiu" + [(set (match_operand:SF 0 "register_operand" "=f") + (mem:SF (plus:SI (match_operand:SI 1 "register_operand" "+a") + (match_operand:SI 2 "fpmem_offset_operand" "i")))) + (set (match_dup 1) + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_HARD_FLOAT" +{ + if (TARGET_SERIALIZE_VOLATILE && volatile_refs_p (PATTERN (insn))) + output_asm_insn ("memw", operands); + return "lsiu\t%0, %1, %2"; +} + [(set_attr "type" "fload") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + +(define_insn "*ssiu" + [(set (mem:SF (plus:SI (match_operand:SI 0 "register_operand" "+a") + (match_operand:SI 1 "fpmem_offset_operand" "i"))) + (match_operand:SF 2 "register_operand" "f")) + (set (match_dup 0) + (plus:SI (match_dup 0) (match_dup 1)))] + "TARGET_HARD_FLOAT" +{ + if (TARGET_SERIALIZE_VOLATILE && volatile_refs_p (PATTERN (insn))) + output_asm_insn ("memw", operands); + return "ssiu\t%2, %0, %1"; +} + [(set_attr "type" "fstore") + (set_attr "mode" "SF") + (set_attr "length" "3")]) + +;; 64-bit floating point moves + +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmed_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" +{ + if (CONSTANT_P (operands[1]) && !TARGET_CONST16) + operands[1] = force_const_mem (DFmode, operands[1]); + + if (!register_operand (operands[0], DFmode) + && !register_operand (operands[1], DFmode)) + operands[1] = force_reg (DFmode, operands[1]); + + operands[1] = xtensa_copy_incoming_a7 (operands[1]); +}) + +(define_insn_and_split "movdf_internal" + [(set (match_operand:DF 0 "nonimmed_operand" "=a,W,a,a,U") + (match_operand:DF 1 "move_operand" "r,iF,T,U,r"))] + "register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode)" + "#" + "reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] +{ + xtensa_split_operand_pair (operands, SFmode); + if (reg_overlap_mentioned_p (operands[0], operands[3])) + { + rtx tmp; + tmp = operands[0], operands[0] = operands[1], operands[1] = tmp; + tmp = operands[2], operands[2] = operands[3], operands[3] = tmp; + } +}) + +;; Block moves + +(define_expand "movmemsi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "")) + (use (match_operand:SI 2 "arith_operand" "")) + (use (match_operand:SI 3 "const_int_operand" ""))])] + "" +{ + if (!xtensa_expand_block_move (operands)) + FAIL; + DONE; +}) + + +;; Shift instructions. + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "arith_operand" "")))] + "" +{ + operands[1] = xtensa_copy_incoming_a7 (operands[1]); +}) + +(define_insn "ashlsi3_internal" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "J,r")))] + "" + "@ + slli\t%0, %1, %R2 + ssl\t%2\;sll\t%0, %1" + [(set_attr "type" "arith,arith") + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +(define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "J,r")))] + "" + "@ + srai\t%0, %1, %R2 + ssr\t%2\;sra\t%0, %1" + [(set_attr "type" "arith,arith") + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "J,r")))] + "" +{ + if (which_alternative == 0) + { + if ((INTVAL (operands[2]) & 0x1f) < 16) + return "srli\t%0, %1, %R2"; + else + return "extui\t%0, %1, %R2, %L2"; + } + return "ssr\t%2\;srl\t%0, %1"; +} + [(set_attr "type" "arith,arith") + (set_attr "mode" "SI") + (set_attr "length" "3,6")]) + +(define_insn "rotlsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (rotate:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "J,r")))] + "" + "@ + ssai\t%L2\;src\t%0, %1, %1 + ssl\t%2\;src\t%0, %1, %1" + [(set_attr "type" "multi,multi") + (set_attr "mode" "SI") + (set_attr "length" "6,6")]) + +(define_insn "rotrsi3" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (rotatert:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "J,r")))] + "" + "@ + ssai\t%R2\;src\t%0, %1, %1 + ssr\t%2\;src\t%0, %1, %1" + [(set_attr "type" "multi,multi") + (set_attr "mode" "SI") + (set_attr "length" "6,6")]) + + +;; Comparisons. + +;; Conditional branches. + +(define_expand "cbranchsi4" + [(match_operator 0 "comparison_operator" + [(match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand")]) + (match_operand 3 "")] + "" +{ + xtensa_expand_conditional_branch (operands, SImode); + DONE; +}) + +(define_expand "cbranchsf4" + [(match_operator 0 "comparison_operator" + [(match_operand:SF 1 "register_operand") + (match_operand:SF 2 "register_operand")]) + (match_operand 3 "")] + "TARGET_HARD_FLOAT" +{ + xtensa_expand_conditional_branch (operands, SFmode); + DONE; +}) + +;; Branch patterns for standard integer comparisons + +(define_insn "*btrue" + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" + [(match_operand:SI 0 "register_operand" "r,r") + (match_operand:SI 1 "branch_operand" "K,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +{ + return xtensa_emit_branch (false, which_alternative == 0, operands); +} + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") + (set_attr "length" "3,3")]) + +(define_insn "*bfalse" + [(set (pc) + (if_then_else (match_operator 3 "branch_operator" + [(match_operand:SI 0 "register_operand" "r,r") + (match_operand:SI 1 "branch_operand" "K,r")]) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" +{ + return xtensa_emit_branch (true, which_alternative == 0, operands); +} + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") + (set_attr "length" "3,3")]) + +(define_insn "*ubtrue" + [(set (pc) + (if_then_else (match_operator 3 "ubranch_operator" + [(match_operand:SI 0 "register_operand" "r,r") + (match_operand:SI 1 "ubranch_operand" "L,r")]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +{ + return xtensa_emit_branch (false, which_alternative == 0, operands); +} + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") + (set_attr "length" "3,3")]) + +(define_insn "*ubfalse" + [(set (pc) + (if_then_else (match_operator 3 "ubranch_operator" + [(match_operand:SI 0 "register_operand" "r,r") + (match_operand:SI 1 "ubranch_operand" "L,r")]) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" +{ + return xtensa_emit_branch (true, which_alternative == 0, operands); +} + [(set_attr "type" "jump,jump") + (set_attr "mode" "none") + (set_attr "length" "3,3")]) + +;; Branch patterns for bit testing + +(define_insn "*bittrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" + [(zero_extract:SI + (match_operand:SI 0 "register_operand" "r,r") + (const_int 1) + (match_operand:SI 1 "arith_operand" "J,r")) + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +{ + return xtensa_emit_bit_branch (false, which_alternative == 0, operands); +} + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +(define_insn "*bitfalse" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" + [(zero_extract:SI + (match_operand:SI 0 "register_operand" "r,r") + (const_int 1) + (match_operand:SI 1 "arith_operand" "J,r")) + (const_int 0)]) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" +{ + return xtensa_emit_bit_branch (true, which_alternative == 0, operands); +} + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +(define_insn "*masktrue" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "r")) + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +{ + switch (GET_CODE (operands[3])) + { + case EQ: return "bnone\t%0, %1, %2"; + case NE: return "bany\t%0, %1, %2"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +(define_insn "*maskfalse" + [(set (pc) + (if_then_else (match_operator 3 "boolean_operator" + [(and:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "r")) + (const_int 0)]) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" +{ + switch (GET_CODE (operands[3])) + { + case EQ: return "bany\t%0, %1, %2"; + case NE: return "bnone\t%0, %1, %2"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + + +;; Define the loop insns used by bct optimization to represent the +;; start and end of a zero-overhead loop (in loop.c). This start +;; template generates the loop insn; the end template doesn't generate +;; any instructions since loop end is handled in hardware. + +(define_insn "zero_cost_loop_start" + [(set (pc) + (if_then_else (eq (match_operand:SI 0 "register_operand" "a") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (reg:SI 19) + (plus:SI (match_dup 0) (const_int -1)))] + "" + "loopnez\t%0, %l1" + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +(define_insn "zero_cost_loop_end" + [(set (pc) + (if_then_else (ne (reg:SI 19) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc))) + (set (reg:SI 19) + (plus:SI (reg:SI 19) (const_int -1)))] + "" +{ + xtensa_emit_loop_end (insn, operands); + return ""; +} + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "0")]) + + +;; Setting a register from a comparison. + +(define_expand "cstoresi4" + [(match_operand:SI 0 "register_operand") + (match_operator 1 "xtensa_cstoresi_operator" + [(match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonmemory_operand")])] + "" +{ + if (!xtensa_expand_scc (operands, SImode)) + FAIL; + DONE; +}) + +(define_expand "cstoresf4" + [(match_operand:SI 0 "register_operand") + (match_operator:SI 1 "comparison_operator" + [(match_operand:SF 2 "register_operand") + (match_operand:SF 3 "register_operand")])] + "TARGET_HARD_FLOAT" +{ + if (!xtensa_expand_scc (operands, SFmode)) + FAIL; + DONE; +}) + + + +;; Conditional moves. + +(define_expand "movsicc" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "")))] + "" +{ + if (!xtensa_expand_conditional_move (operands, 0)) + FAIL; + DONE; +}) + +(define_expand "movsfcc" + [(set (match_operand:SF 0 "register_operand" "") + (if_then_else:SF (match_operand 1 "comparison_operator" "") + (match_operand:SF 2 "register_operand" "") + (match_operand:SF 3 "register_operand" "")))] + "" +{ + if (!xtensa_expand_conditional_move (operands, 1)) + FAIL; + DONE; +}) + +(define_insn "movsicc_internal0" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (if_then_else:SI (match_operator 4 "branch_operator" + [(match_operand:SI 1 "register_operand" "r,r") + (const_int 0)]) + (match_operand:SI 2 "register_operand" "r,0") + (match_operand:SI 3 "register_operand" "0,r")))] + "" +{ + return xtensa_emit_movcc (which_alternative == 1, false, false, operands); +} + [(set_attr "type" "move,move") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + +(define_insn "movsicc_internal1" + [(set (match_operand:SI 0 "register_operand" "=a,a") + (if_then_else:SI (match_operator 4 "boolean_operator" + [(match_operand:CC 1 "register_operand" "b,b") + (const_int 0)]) + (match_operand:SI 2 "register_operand" "r,0") + (match_operand:SI 3 "register_operand" "0,r")))] + "TARGET_BOOLEANS" +{ + return xtensa_emit_movcc (which_alternative == 1, false, true, operands); +} + [(set_attr "type" "move,move") + (set_attr "mode" "SI") + (set_attr "length" "3,3")]) + +(define_insn "movsfcc_internal0" + [(set (match_operand:SF 0 "register_operand" "=a,a,f,f") + (if_then_else:SF (match_operator 4 "branch_operator" + [(match_operand:SI 1 "register_operand" "r,r,r,r") + (const_int 0)]) + (match_operand:SF 2 "register_operand" "r,0,f,0") + (match_operand:SF 3 "register_operand" "0,r,0,f")))] + "" +{ + return xtensa_emit_movcc ((which_alternative & 1) == 1, + which_alternative >= 2, false, operands); +} + [(set_attr "type" "move,move,move,move") + (set_attr "mode" "SF") + (set_attr "length" "3,3,3,3")]) + +(define_insn "movsfcc_internal1" + [(set (match_operand:SF 0 "register_operand" "=a,a,f,f") + (if_then_else:SF (match_operator 4 "boolean_operator" + [(match_operand:CC 1 "register_operand" "b,b,b,b") + (const_int 0)]) + (match_operand:SF 2 "register_operand" "r,0,f,0") + (match_operand:SF 3 "register_operand" "0,r,0,f")))] + "TARGET_BOOLEANS" +{ + return xtensa_emit_movcc ((which_alternative & 1) == 1, + which_alternative >= 2, true, operands); +} + [(set_attr "type" "move,move,move,move") + (set_attr "mode" "SF") + (set_attr "length" "3,3,3,3")]) + + +;; Floating-point comparisons. + +(define_insn "s<code>_sf" + [(set (match_operand:CC 0 "register_operand" "=b") + (any_scc_sf:CC (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "<scc_sf>.s\t%0, %1, %2" + [(set_attr "type" "farith") + (set_attr "mode" "BL") + (set_attr "length" "3")]) + + +;; Unconditional branches. + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "j\t%l0" + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +(define_expand "indirect_jump" + [(set (pc) + (match_operand 0 "register_operand" ""))] + "" +{ + rtx dest = operands[0]; + if (GET_CODE (dest) != REG || GET_MODE (dest) != Pmode) + operands[0] = copy_to_mode_reg (Pmode, dest); + + emit_jump_insn (gen_indirect_jump_internal (dest)); + DONE; +}) + +(define_insn "indirect_jump_internal" + [(set (pc) (match_operand:SI 0 "register_operand" "r"))] + "" + "jx\t%0" + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + + +(define_expand "tablejump" + [(use (match_operand:SI 0 "register_operand" "")) + (use (label_ref (match_operand 1 "" "")))] + "" +{ + rtx target = operands[0]; + if (flag_pic) + { + /* For PIC, the table entry is relative to the start of the table. */ + rtx label = gen_reg_rtx (SImode); + target = gen_reg_rtx (SImode); + emit_move_insn (label, gen_rtx_LABEL_REF (SImode, operands[1])); + emit_insn (gen_addsi3 (target, operands[0], label)); + } + emit_jump_insn (gen_tablejump_internal (target, operands[1])); + DONE; +}) + +(define_insn "tablejump_internal" + [(set (pc) + (match_operand:SI 0 "register_operand" "r")) + (use (label_ref (match_operand 1 "" "")))] + "" + "jx\t%0" + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + + +;; Function calls. + +(define_expand "sym_PLT" + [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PLT))] + "" + "") + +(define_expand "call" + [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "" ""))] + "" +{ + rtx addr = XEXP (operands[0], 0); + if (flag_pic && GET_CODE (addr) == SYMBOL_REF + && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) + addr = gen_sym_PLT (addr); + if (!call_insn_operand (addr, VOIDmode)) + XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr); +}) + +(define_insn "call_internal" + [(call (mem (match_operand:SI 0 "call_insn_operand" "nir")) + (match_operand 1 "" "i"))] + "" +{ + return xtensa_emit_call (0, operands); +} + [(set_attr "type" "call") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +(define_expand "call_value" + [(set (match_operand 0 "register_operand" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "" "")))] + "" +{ + rtx addr = XEXP (operands[1], 0); + if (flag_pic && GET_CODE (addr) == SYMBOL_REF + && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr))) + addr = gen_sym_PLT (addr); + if (!call_insn_operand (addr, VOIDmode)) + XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr); +}) + +(define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "=a") + (call (mem (match_operand:SI 1 "call_insn_operand" "nir")) + (match_operand 2 "" "i")))] + "" +{ + return xtensa_emit_call (1, operands); +} + [(set_attr "type" "call") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +(define_insn "entry" + [(set (reg:SI A1_REG) + (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")] + UNSPECV_ENTRY))] + "" + "entry\tsp, %0" + [(set_attr "type" "entry") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "return" + [(return) + (use (reg:SI A0_REG))] + "reload_completed" +{ + return (TARGET_DENSITY ? "retw.n" : "retw"); +} + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "2")]) + + +;; Miscellaneous instructions. + +(define_expand "prologue" + [(const_int 0)] + "" +{ + xtensa_expand_prologue (); + DONE; +}) + +(define_expand "epilogue" + [(return)] + "" +{ + emit_jump_insn (gen_return ()); + DONE; +}) + +(define_insn "nop" + [(const_int 0)] + "" +{ + return (TARGET_DENSITY ? "nop.n" : "nop"); +} + [(set_attr "type" "nop") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +(define_expand "nonlocal_goto" + [(match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "") + (match_operand:SI 3 "" "")] + "" +{ + xtensa_expand_nonlocal_goto (operands); + DONE; +}) + +;; Stuff an address into the return address register along with the window +;; size in the high bits. Because we don't have the window size of the +;; previous frame, assume the function called out with a CALL8 since that +;; is what compilers always use. Note: __builtin_frob_return_addr has +;; already been applied to the handler, but the generic version doesn't +;; allow us to frob it quite enough, so we just frob here. + +(define_insn_and_split "eh_return" + [(set (reg:SI A0_REG) + (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] + UNSPECV_EH_RETURN)) + (clobber (match_scratch:SI 1 "=r"))] + "" + "#" + "reload_completed" + [(set (match_dup 1) (ashift:SI (match_dup 0) (const_int 2))) + (set (match_dup 1) (plus:SI (match_dup 1) (const_int 2))) + (set (reg:SI A0_REG) (rotatert:SI (match_dup 1) (const_int 2)))] + "") + +;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't +;; know if a frame pointer is required until the reload pass, and +;; because there may be an incoming argument value in the hard frame +;; pointer register (a7). If there is an incoming argument in that +;; register, the "set_frame_ptr" insn gets inserted immediately after +;; the insn that copies the incoming argument to a pseudo or to the +;; stack. This serves several purposes here: (1) it keeps the +;; optimizer from copy-propagating or scheduling the use of a7 as an +;; incoming argument away from the beginning of the function; (2) we +;; can use a post-reload splitter to expand away the insn if a frame +;; pointer is not required, so that the post-reload scheduler can do +;; the right thing; and (3) it makes it easy for the prologue expander +;; to search for this insn to determine whether it should add a new insn +;; to set up the frame pointer. + +(define_insn "set_frame_ptr" + [(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))] + "" +{ + if (frame_pointer_needed) + return "mov\ta7, sp"; + return ""; +} + [(set_attr "type" "move") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +;; Post-reload splitter to remove fp assignment when it's not needed. +(define_split + [(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))] + "reload_completed && !frame_pointer_needed" + [(unspec [(const_int 0)] UNSPEC_NOP)] + "") + +;; The preceding splitter needs something to split the insn into; +;; things start breaking if the result is just a "use" so instead we +;; generate the following insn. +(define_insn "*unspec_nop" + [(unspec [(const_int 0)] UNSPEC_NOP)] + "" + "" + [(set_attr "type" "nop") + (set_attr "mode" "none") + (set_attr "length" "0")]) + + +;; TLS support + +(define_expand "sym_TPOFF" + [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_TPOFF))] + "" + "") + +(define_expand "sym_DTPOFF" + [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_DTPOFF))] + "" + "") + +(define_insn "load_tp" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(const_int 0)] UNSPEC_TP))] + "TARGET_THREADPTR" + "rur\t%0, THREADPTR" + [(set_attr "type" "rsr") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "set_tp" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] + UNSPECV_SET_TP)] + "TARGET_THREADPTR" + "wur\t%0, THREADPTR" + [(set_attr "type" "wsr") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "tls_func" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "tls_symbol_operand" "")] + UNSPEC_TLS_FUNC))] + "TARGET_THREADPTR && HAVE_AS_TLS" + "movi\t%0, %1@TLSFUNC" + [(set_attr "type" "load") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "tls_arg" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "tls_symbol_operand" "")] + UNSPEC_TLS_ARG))] + "TARGET_THREADPTR && HAVE_AS_TLS" + "movi\t%0, %1@TLSARG" + [(set_attr "type" "load") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "tls_call" + [(set (match_operand:SI 0 "register_operand" "=a") + (call (mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "tls_symbol_operand" "")] + UNSPEC_TLS_CALL)) + (match_operand 3 "" "i")))] + "TARGET_THREADPTR && HAVE_AS_TLS" + "callx8.tls %1, %2@TLSCALL" + [(set_attr "type" "call") + (set_attr "mode" "none") + (set_attr "length" "3")]) + + +;; Instructions for the Xtensa "boolean" option. + +(define_insn "*booltrue" + [(set (pc) + (if_then_else (match_operator 2 "boolean_operator" + [(match_operand:CC 0 "register_operand" "b") + (const_int 0)]) + (label_ref (match_operand 1 "" "")) + (pc)))] + "TARGET_BOOLEANS" +{ + if (GET_CODE (operands[2]) == EQ) + return "bf\t%0, %1"; + else + return "bt\t%0, %1"; +} + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +(define_insn "*boolfalse" + [(set (pc) + (if_then_else (match_operator 2 "boolean_operator" + [(match_operand:CC 0 "register_operand" "b") + (const_int 0)]) + (pc) + (label_ref (match_operand 1 "" ""))))] + "TARGET_BOOLEANS" +{ + if (GET_CODE (operands[2]) == EQ) + return "bt\t%0, %1"; + else + return "bf\t%0, %1"; +} + [(set_attr "type" "jump") + (set_attr "mode" "none") + (set_attr "length" "3")]) + + +;; Atomic operations + +(define_expand "memory_barrier" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MEMW))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*memory_barrier" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMW))] + "" + "memw" + [(set_attr "type" "unknown") + (set_attr "mode" "none") + (set_attr "length" "3")]) + +;; sync_lock_release is only implemented for SImode. +;; For other modes, just use the default of a store with a memory_barrier. +(define_insn "sync_lock_releasesi" + [(set (match_operand:SI 0 "mem_operand" "=U") + (unspec_volatile:SI + [(match_operand:SI 1 "register_operand" "r")] + UNSPECV_S32RI))] + "TARGET_RELEASE_SYNC" + "s32ri\t%1, %0" + [(set_attr "type" "store") + (set_attr "mode" "SI") + (set_attr "length" "3")]) + +(define_insn "sync_compare_and_swapsi" + [(parallel + [(set (match_operand:SI 0 "register_operand" "=a") + (match_operand:SI 1 "mem_operand" "+U")) + (set (match_dup 1) + (unspec_volatile:SI + [(match_dup 1) + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "0")] + UNSPECV_S32C1I))])] + "TARGET_S32C1I" + "wsr\t%2, SCOMPARE1\;s32c1i\t%3, %1" + [(set_attr "type" "multi") + (set_attr "mode" "SI") + (set_attr "length" "6")]) + +(define_expand "sync_compare_and_swap<mode>" + [(parallel + [(set (match_operand:HQI 0 "register_operand" "") + (match_operand:HQI 1 "mem_operand" "")) + (set (match_dup 1) + (unspec_volatile:HQI + [(match_dup 1) + (match_operand:HQI 2 "register_operand" "") + (match_operand:HQI 3 "register_operand" "")] + UNSPECV_S32C1I))])] + "TARGET_S32C1I" +{ + xtensa_expand_compare_and_swap (operands[0], operands[1], + operands[2], operands[3]); + DONE; +}) + +(define_expand "sync_lock_test_and_set<mode>" + [(match_operand:HQI 0 "register_operand") + (match_operand:HQI 1 "memory_operand") + (match_operand:HQI 2 "register_operand")] + "TARGET_S32C1I" +{ + xtensa_expand_atomic (SET, operands[0], operands[1], operands[2], false); + DONE; +}) + +(define_expand "sync_<atomic><mode>" + [(set (match_operand:HQI 0 "memory_operand") + (ATOMIC:HQI (match_dup 0) + (match_operand:HQI 1 "register_operand")))] + "TARGET_S32C1I" +{ + xtensa_expand_atomic (<CODE>, NULL_RTX, operands[0], operands[1], false); + DONE; +}) + +(define_expand "sync_old_<atomic><mode>" + [(set (match_operand:HQI 0 "register_operand") + (match_operand:HQI 1 "memory_operand")) + (set (match_dup 1) + (ATOMIC:HQI (match_dup 1) + (match_operand:HQI 2 "register_operand")))] + "TARGET_S32C1I" +{ + xtensa_expand_atomic (<CODE>, operands[0], operands[1], operands[2], false); + DONE; +}) + +(define_expand "sync_new_<atomic><mode>" + [(set (match_operand:HQI 0 "register_operand") + (ATOMIC:HQI (match_operand:HQI 1 "memory_operand") + (match_operand:HQI 2 "register_operand"))) + (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))] + "TARGET_S32C1I" +{ + xtensa_expand_atomic (<CODE>, operands[0], operands[1], operands[2], true); + DONE; +}) diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt new file mode 100644 index 000000000..e78104f1c --- /dev/null +++ b/gcc/config/xtensa/xtensa.opt @@ -0,0 +1,43 @@ +; Options for the Tensilica Xtensa port of the compiler. + +; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +mconst16 +Target Report Mask(CONST16) +Use CONST16 instruction to load constants + +mforce-no-pic +Target Report Mask(FORCE_NO_PIC) +Disable position-independent code (PIC) for use in OS kernel code + +mlongcalls +Target +Use indirect CALLXn instructions for large programs + +mtarget-align +Target +Automatically align branch targets to reduce branch penalties + +mtext-section-literals +Target +Intersperse literal pools with code in the text section + +mserialize-volatile +Target Report Mask(SERIALIZE_VOLATILE) +-mno-serialize-volatile Do not serialize volatile memory references with MEMW instructions |