summaryrefslogtreecommitdiff
path: root/gcc/config/xtensa
diff options
context:
space:
mode:
authorupstream source tree <ports@midipix.org>2015-03-15 20:14:05 -0400
committerupstream source tree <ports@midipix.org>2015-03-15 20:14:05 -0400
commit554fd8c5195424bdbcabf5de30fdc183aba391bd (patch)
tree976dc5ab7fddf506dadce60ae936f43f58787092 /gcc/config/xtensa
downloadcbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2
cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository.
Diffstat (limited to 'gcc/config/xtensa')
-rw-r--r--gcc/config/xtensa/constraints.md139
-rw-r--r--gcc/config/xtensa/crti.asm51
-rw-r--r--gcc/config/xtensa/crtn.asm46
-rw-r--r--gcc/config/xtensa/elf.h104
-rw-r--r--gcc/config/xtensa/elf.opt30
-rw-r--r--gcc/config/xtensa/ieee754-df.S2388
-rw-r--r--gcc/config/xtensa/ieee754-sf.S1757
-rw-r--r--gcc/config/xtensa/lib1funcs.asm845
-rw-r--r--gcc/config/xtensa/lib2funcs.S186
-rw-r--r--gcc/config/xtensa/libgcc-xtensa.ver3
-rw-r--r--gcc/config/xtensa/linux-unwind.h97
-rw-r--r--gcc/config/xtensa/linux.h71
-rw-r--r--gcc/config/xtensa/predicates.md175
-rw-r--r--gcc/config/xtensa/t-elf6
-rw-r--r--gcc/config/xtensa/t-linux3
-rw-r--r--gcc/config/xtensa/t-xtensa42
-rw-r--r--gcc/config/xtensa/unwind-dw2-xtensa.c546
-rw-r--r--gcc/config/xtensa/unwind-dw2-xtensa.h50
-rw-r--r--gcc/config/xtensa/xtensa-protos.h74
-rw-r--r--gcc/config/xtensa/xtensa.c3715
-rw-r--r--gcc/config/xtensa/xtensa.h847
-rw-r--r--gcc/config/xtensa/xtensa.md1914
-rw-r--r--gcc/config/xtensa/xtensa.opt43
23 files changed, 13132 insertions, 0 deletions
diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
new file mode 100644
index 000000000..bde1ba31a
--- /dev/null
+++ b/gcc/config/xtensa/constraints.md
@@ -0,0 +1,139 @@
+;; Constraint definitions for Xtensa.
+;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+
+(define_register_constraint "a" "GR_REGS"
+ "General-purpose AR registers @code{a0}-@code{a15},
+ except @code{a1} (@code{sp}).")
+
+(define_register_constraint "b" "TARGET_BOOLEANS ? BR_REGS : NO_REGS"
+ "Boolean registers @code{b0}-@code{b15}; only available if the Xtensa
+ Boolean Option is configured.")
+
+(define_register_constraint "d" "TARGET_DENSITY ? AR_REGS: NO_REGS"
+ "@internal
+ All AR registers, including sp, but only if the Xtensa Code Density
+ Option is configured.")
+
+(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
+ "Floating-point registers @code{f0}-@code{f15}; only available if the
+ Xtensa Floating-Pointer Coprocessor is configured.")
+
+(define_register_constraint "q" "SP_REG"
+ "@internal
+ The stack pointer (register @code{a1}).")
+
+(define_register_constraint "A" "TARGET_MAC16 ? ACC_REG : NO_REGS"
+ "The low 32 bits of the accumulator from the Xtensa MAC16 Option.")
+
+(define_register_constraint "B" "TARGET_SEXT ? GR_REGS : NO_REGS"
+ "@internal
+ General-purpose AR registers, but only if the Xtensa Sign Extend
+ Option is configured.")
+
+(define_register_constraint "C" "TARGET_MUL16 ? GR_REGS: NO_REGS"
+ "@internal
+ General-purpose AR registers, but only if the Xtensa 16-Bit Integer
+ Multiply Option is configured.")
+
+(define_register_constraint "D" "TARGET_DENSITY ? GR_REGS: NO_REGS"
+ "@internal
+ General-purpose AR registers, but only if the Xtensa Code Density
+ Option is configured.")
+
+(define_register_constraint "W" "TARGET_CONST16 ? GR_REGS: NO_REGS"
+ "@internal
+ General-purpose AR registers, but only if the Xtensa Const16
+ Option is configured.")
+
+;; Integer constant constraints.
+
+(define_constraint "I"
+ "A signed 12-bit integer constant for use with MOVI instructions."
+ (and (match_code "const_int")
+ (match_test "xtensa_simm12b (ival)")))
+
+(define_constraint "J"
+ "A signed 8-bit integer constant for use with ADDI instructions."
+ (and (match_code "const_int")
+ (match_test "xtensa_simm8 (ival)")))
+
+(define_constraint "K"
+ "A constant integer that can be an immediate operand of an Xtensa
+ conditional branch instruction that performs a signed comparison or
+ a comparison against zero."
+ (and (match_code "const_int")
+ (match_test "xtensa_b4const_or_zero (ival)")))
+
+(define_constraint "L"
+ "A constant integer that can be an immediate operand of an Xtensa
+ conditional branch instruction that performs an unsigned comparison."
+ (and (match_code "const_int")
+ (match_test "xtensa_b4constu (ival)")))
+
+(define_constraint "M"
+ "An integer constant in the range @minus{}32-95 for use with MOVI.N
+ instructions."
+ (and (match_code "const_int")
+ (match_test "ival >= -32 && ival <= 95")))
+
+(define_constraint "N"
+ "An unsigned 8-bit integer constant shifted left by 8 bits for use
+ with ADDMI instructions."
+ (and (match_code "const_int")
+ (match_test "xtensa_simm8x256 (ival)")))
+
+(define_constraint "O"
+ "An integer constant that can be used in ADDI.N instructions."
+ (and (match_code "const_int")
+ (match_test "ival == -1 || (ival >= 1 && ival <= 15)")))
+
+(define_constraint "P"
+ "An integer constant that can be used as a mask value in an EXTUI
+ instruction."
+ (and (match_code "const_int")
+ (match_test "xtensa_mask_immediate (ival)")))
+
+;; Memory constraints. Do not use define_memory_constraint here. Doing so
+;; causes reload to force some constants into the constant pool, but since
+;; the Xtensa constant pool can only be accessed with L32R instructions, it
+;; is always better to just copy a constant into a register. Instead, use
+;; regular constraints but add a check to allow pseudos during reload.
+
+(define_constraint "R"
+ "Memory that can be accessed with a 4-bit unsigned offset from a register."
+ (ior (and (match_code "mem")
+ (match_test "smalloffset_mem_p (op)"))
+ (and (match_code "reg")
+ (match_test "reload_in_progress
+ && REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
+
+(define_constraint "T"
+ "Memory in a literal pool (addressable with an L32R instruction)."
+ (and (match_code "mem")
+ (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
+
+(define_constraint "U"
+ "Memory that is not in a literal pool."
+ (ior (and (match_code "mem")
+ (match_test "! constantpool_mem_p (op)"))
+ (and (match_code "reg")
+ (match_test "reload_in_progress
+ && REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
diff --git a/gcc/config/xtensa/crti.asm b/gcc/config/xtensa/crti.asm
new file mode 100644
index 000000000..cbe91b0e7
--- /dev/null
+++ b/gcc/config/xtensa/crti.asm
@@ -0,0 +1,51 @@
+# Start .init and .fini sections.
+# Copyright (C) 2003, 2009 Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just makes a stack frame for the contents of the .fini and
+# .init sections. Users may put any desired instructions in those
+# sections.
+
+#include "xtensa-config.h"
+
+ .section .init
+ .globl _init
+ .type _init,@function
+ .align 4
+_init:
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ entry sp, 64
+#else
+ addi sp, sp, -32
+ s32i a0, sp, 0
+#endif
+
+ .section .fini
+ .globl _fini
+ .type _fini,@function
+ .align 4
+_fini:
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ entry sp, 64
+#else
+ addi sp, sp, -32
+ s32i a0, sp, 0
+#endif
diff --git a/gcc/config/xtensa/crtn.asm b/gcc/config/xtensa/crtn.asm
new file mode 100644
index 000000000..413cfa0ac
--- /dev/null
+++ b/gcc/config/xtensa/crtn.asm
@@ -0,0 +1,46 @@
+# End of .init and .fini sections.
+# Copyright (C) 2003, 2009 Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
+
+
+# This file just makes sure that the .fini and .init sections do in
+# fact return. Users may put any desired instructions in those sections.
+# This file is the last thing linked into any executable.
+
+#include "xtensa-config.h"
+
+ .section .init
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ retw
+#else
+ l32i a0, sp, 0
+ addi sp, sp, 32
+ ret
+#endif
+
+ .section .fini
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ retw
+#else
+ l32i a0, sp, 0
+ addi sp, sp, 32
+ ret
+#endif
diff --git a/gcc/config/xtensa/elf.h b/gcc/config/xtensa/elf.h
new file mode 100644
index 000000000..54a9c8f19
--- /dev/null
+++ b/gcc/config/xtensa/elf.h
@@ -0,0 +1,104 @@
+/* Xtensa/Elf configuration.
+ Derived from the configuration for GCC for Intel i386 running Linux.
+ Copyright (C) 2001, 2003, 2006, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define TARGET_SECTION_TYPE_FLAGS xtensa_multibss_section_type_flags
+
+/* Don't assume anything about the header files. */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fputs (" (Xtensa/ELF)", stderr);
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+ "%{mtext-section-literals:--text-section-literals} \
+ %{mno-text-section-literals:--no-text-section-literals} \
+ %{mtarget-align:--target-align} \
+ %{mno-target-align:--no-target-align} \
+ %{mlongcalls:--longcalls} \
+ %{mno-longcalls:--no-longcalls}"
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc -lsim -lc -lhandlers-sim -lhal"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+ "crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{shared:-shared} \
+ %{!shared: \
+ %{!static: \
+ %{rdynamic:-export-dynamic} \
+ %{static:-static}}}"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* Avoid dots for compatibility with VxWorks. */
+#undef NO_DOLLAR_IN_LABEL
+#define NO_DOT_IN_LABEL
+
+/* Do not force "-fpic" for this target. */
+#define XTENSA_ALWAYS_PIC 0
+
+#undef DBX_REGISTER_NUMBER
+
+/* Search for headers in $tooldir/arch/include and for libraries and
+ startfiles in $tooldir/arch/lib. */
+#define GCC_DRIVER_HOST_INITIALIZATION \
+do \
+{ \
+ char *tooldir, *archdir; \
+ tooldir = concat (tooldir_base_prefix, spec_machine, \
+ dir_separator_str, NULL); \
+ if (!IS_ABSOLUTE_PATH (tooldir)) \
+ tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \
+ spec_version, dir_separator_str, tooldir, NULL); \
+ archdir = concat (tooldir, "arch", dir_separator_str, NULL); \
+ add_prefix (&startfile_prefixes, \
+ concat (archdir, "lib", dir_separator_str, NULL), \
+ "GCC", PREFIX_PRIORITY_LAST, 0, 1); \
+ add_prefix (&include_prefixes, archdir, \
+ "GCC", PREFIX_PRIORITY_LAST, 0, 0); \
+ } \
+while (0)
diff --git a/gcc/config/xtensa/elf.opt b/gcc/config/xtensa/elf.opt
new file mode 100644
index 000000000..bdeac15b2
--- /dev/null
+++ b/gcc/config/xtensa/elf.opt
@@ -0,0 +1,30 @@
+; Xtensa ELF (bare metal) options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3. If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+rdynamic
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S
new file mode 100644
index 000000000..9b46889bd
--- /dev/null
+++ b/gcc/config/xtensa/ieee754-df.S
@@ -0,0 +1,2388 @@
+/* IEEE-754 double-precision functions for Xtensa
+ Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/* Warning! The branch displacements for some Xtensa branch instructions
+ are quite small, and this code has been carefully laid out to keep
+ branch targets in range. If you change anything, be sure to check that
+ the assembler is not relaxing anything to branch over a jump. */
+
+#ifdef L_negdf2
+
+ .align 4
+ .global __negdf2
+ .type __negdf2, @function
+__negdf2:
+ leaf_entry sp, 16
+ movi a4, 0x80000000
+ xor xh, xh, a4
+ leaf_return
+
+#endif /* L_negdf2 */
+
+#ifdef L_addsubdf3
+
+ /* Addition */
+__adddf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Ladd_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall yh, a6, 1f
+ /* If x is a NaN, return it. Otherwise, return y. */
+ slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, .Ladd_ynan_or_inf
+1: leaf_return
+
+.Ladd_ynan_or_inf:
+ /* Return y. */
+ mov xh, yh
+ mov xl, yl
+ leaf_return
+
+.Ladd_opposite_signs:
+ /* Operand signs differ. Do a subtraction. */
+ slli a7, a6, 11
+ xor yh, yh, a7
+ j .Lsub_same_sign
+
+ .align 4
+ .global __adddf3
+ .type __adddf3, @function
+__adddf3:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, xh, yh
+ bltz a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:
+ /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
+ ball xh, a6, .Ladd_xnan_or_inf
+ ball yh, a6, .Ladd_ynan_or_inf
+
+ /* Compare the exponents. The smaller operand will be shifted
+ right by the exponent difference and added to the larger
+ one. */
+ extui a7, xh, 20, 12
+ extui a8, yh, 20, 12
+ bltu a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone yh, a6, .Ladd_yexpzero
+
+ /* Replace yh sign/exponent with 0x001. */
+ or yh, yh, a6
+ slli yh, yh, 11
+ srli yh, yh, 11
+
+.Ladd_yexpdiff:
+ /* Compute the exponent difference. Optimize for difference < 32. */
+ sub a10, a7, a8
+ bgeui a10, 32, .Ladd_bigshifty
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out of yl are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, yl, a9
+ src yl, yh, yl
+ srl yh, yh
+
+.Ladd_addy:
+ /* Do the 64-bit addition. */
+ add xl, xl, yl
+ add xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, 1
+1:
+ /* Check if the add overflowed into the exponent. */
+ extui a10, xh, 20, 12
+ beq a10, a7, .Ladd_round
+ mov a8, a7
+ j .Ladd_carry
+
+.Ladd_yexpzero:
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0", and increment the apparent exponent
+ because subnormals behave as if they had the minimum (nonzero)
+ exponent. Test for the case when both exponents are zero. */
+ slli yh, yh, 12
+ srli yh, yh, 12
+ bnone xh, a6, .Ladd_bothexpzero
+ addi a8, a8, 1
+ j .Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+ /* Both exponents are zero. Handle this as a special case. There
+ is no need to shift or round, and the normal code for handling
+ a carry into the exponent field will not work because it
+ assumes there is an implicit "1.0" that needs to be added. */
+ add xl, xl, yl
+ add xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, 1
+1: leaf_return
+
+.Ladd_bigshifty:
+ /* Exponent difference > 64 -- just return the bigger value. */
+ bgeui a10, 64, 1b
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out are saved in a9 for rounding the result. */
+ ssr a10
+ sll a11, yl /* lost bits shifted out of yl */
+ src a9, yh, yl
+ srl yl, yh
+ movi yh, 0
+ beqz a11, .Ladd_addy
+ or a9, a9, a10 /* any positive, nonzero value will work */
+ j .Ladd_addy
+
+.Ladd_xexpzero:
+ /* Same as "yexpzero" except skip handling the case when both
+ exponents are zero. */
+ slli xh, xh, 12
+ srli xh, xh, 12
+ addi a7, a7, 1
+ j .Ladd_xexpdiff
+
+.Ladd_shiftx:
+ /* Same thing as the "shifty" code, but with x and y swapped. Also,
+ because the exponent difference is always nonzero in this version,
+ the shift sequence can use SLL and skip loading a constant zero. */
+ bnone xh, a6, .Ladd_xexpzero
+
+ or xh, xh, a6
+ slli xh, xh, 11
+ srli xh, xh, 11
+
+.Ladd_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Ladd_bigshiftx
+
+ ssr a10
+ sll a9, xl
+ src xl, xh, xl
+ srl xh, xh
+
+.Ladd_addx:
+ add xl, xl, yl
+ add xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, 1
+1:
+ /* Check if the add overflowed into the exponent. */
+ extui a10, xh, 20, 12
+ bne a10, a8, .Ladd_carry
+
+.Ladd_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi xl, xl, 1
+ beqz xl, .Ladd_roundcarry
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_bigshiftx:
+ /* Mostly the same thing as "bigshifty".... */
+ bgeui a10, 64, .Ladd_returny
+
+ ssr a10
+ sll a11, xl
+ src a9, xh, xl
+ srl xl, xh
+ movi xh, 0
+ beqz a11, .Ladd_addx
+ or a9, a9, a10
+ j .Ladd_addx
+
+.Ladd_returny:
+ mov xh, yh
+ mov xl, yl
+ leaf_return
+
+.Ladd_carry:
+ /* The addition has overflowed into the exponent field, so the
+ value needs to be renormalized. The mantissa of the result
+ can be recovered by subtracting the original exponent and
+ adding 0x100000 (which is the explicit "1.0" for the
+ mantissa of the non-shifted operand -- the "1.0" for the
+ shifted operand was already added). The mantissa can then
+ be shifted right by one bit. The explicit "1.0" of the
+ shifted mantissa then needs to be replaced by the exponent,
+ incremented by one to account for the normalizing shift.
+ It is faster to combine these operations: do the shift first
+ and combine the additions and subtractions. If x is the
+ original exponent, the result is:
+ shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
+ or:
+ shifted mantissa + ((x + 1) << 19)
+ Note that the exponent is incremented here by leaving the
+ explicit "1.0" of the mantissa in the exponent field. */
+
+ /* Shift xh/xl right by one bit. Save the lsb of xl. */
+ mov a10, xl
+ ssai 1
+ src xl, xh, xl
+ srl xh, xh
+
+ /* See explanation above. The original exponent is in a8. */
+ addi a8, a8, 1
+ slli a8, a8, 19
+ add xh, xh, a8
+
+ /* Return an Infinity if the exponent overflowed. */
+ ball xh, a6, .Ladd_infinity
+
+ /* Same thing as the "round" code except the msb of the leftover
+ fraction is bit 0 of a10, with the rest of the fraction in a9. */
+ bbci.l a10, 0, 1f
+ addi xl, xl, 1
+ beqz xl, .Ladd_roundcarry
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_infinity:
+ /* Clear the mantissa. */
+ movi xl, 0
+ srli xh, xh, 20
+ slli xh, xh, 20
+
+ /* The sign bit may have been lost in a carry-out. Put it back. */
+ slli a8, a8, 1
+ or xh, xh, a8
+ leaf_return
+
+.Ladd_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ leaf_return
+
+.Ladd_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow to the exponent is OK. */
+ leaf_return
+
+
+ /* Subtraction */
+__subdf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Lsub_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall yh, a6, 1f
+ /* Both x and y are either NaN or Inf, so the result is NaN. */
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+1: leaf_return
+
+.Lsub_ynan_or_inf:
+ /* Negate y and return it. */
+ slli a7, a6, 11
+ xor xh, yh, a7
+ mov xl, yl
+ leaf_return
+
+.Lsub_opposite_signs:
+ /* Operand signs differ. Do an addition. */
+ slli a7, a6, 11
+ xor yh, yh, a7
+ j .Ladd_same_sign
+
+ .align 4
+ .global __subdf3
+ .type __subdf3, @function
+__subdf3:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, xh, yh
+ bltz a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:
+ /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
+ ball xh, a6, .Lsub_xnan_or_inf
+ ball yh, a6, .Lsub_ynan_or_inf
+
+ /* Compare the operands. In contrast to addition, the entire
+ value matters here. */
+ extui a7, xh, 20, 11
+ extui a8, yh, 20, 11
+ bltu xh, yh, .Lsub_xsmaller
+ beq xh, yh, .Lsub_compare_low
+
+.Lsub_ysmaller:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone yh, a6, .Lsub_yexpzero
+
+ /* Replace yh sign/exponent with 0x001. */
+ or yh, yh, a6
+ slli yh, yh, 11
+ srli yh, yh, 11
+
+.Lsub_yexpdiff:
+ /* Compute the exponent difference. Optimize for difference < 32. */
+ sub a10, a7, a8
+ bgeui a10, 32, .Lsub_bigshifty
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out of yl are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, yl, a9
+ src yl, yh, yl
+ srl yh, yh
+
+.Lsub_suby:
+ /* Do the 64-bit subtraction. */
+ sub xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, -1
+1: sub xl, xl, yl
+
+ /* Subtract the leftover bits in a9 from zero and propagate any
+ borrow from xh/xl. */
+ neg a9, a9
+ beqz a9, 1f
+ addi a5, xh, -1
+ moveqz xh, a5, xl
+ addi xl, xl, -1
+1:
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, xh, 20, 11
+ beq a10, a7, .Lsub_round
+ j .Lsub_borrow
+
+.Lsub_compare_low:
+ /* The high words are equal. Compare the low words. */
+ bltu xl, yl, .Lsub_xsmaller
+ bltu yl, xl, .Lsub_ysmaller
+ /* The operands are equal. Return 0.0. */
+ movi xh, 0
+ movi xl, 0
+1: leaf_return
+
+.Lsub_yexpzero:
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0". Unless x is also a subnormal, increment
+ y's apparent exponent because subnormals behave as if they had
+ the minimum (nonzero) exponent. */
+ slli yh, yh, 12
+ srli yh, yh, 12
+ bnone xh, a6, .Lsub_yexpdiff
+ addi a8, a8, 1
+ j .Lsub_yexpdiff
+
+.Lsub_bigshifty:
+ /* Exponent difference > 64 -- just return the bigger value. */
+ bgeui a10, 64, 1b
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out are saved in a9 for rounding the result. */
+ ssr a10
+ sll a11, yl /* lost bits shifted out of yl */
+ src a9, yh, yl
+ srl yl, yh
+ movi yh, 0
+ beqz a11, .Lsub_suby
+ or a9, a9, a10 /* any positive, nonzero value will work */
+ j .Lsub_suby
+
+.Lsub_xsmaller:
+ /* Same thing as the "ysmaller" code, but with x and y swapped and
+ with y negated. */
+ bnone xh, a6, .Lsub_xexpzero
+
+ or xh, xh, a6
+ slli xh, xh, 11
+ srli xh, xh, 11
+
+.Lsub_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Lsub_bigshiftx
+
+ ssr a10
+ movi a9, 0
+ src a9, xl, a9
+ src xl, xh, xl
+ srl xh, xh
+
+ /* Negate y. */
+ slli a11, a6, 11
+ xor yh, yh, a11
+
+.Lsub_subx:
+ sub xl, yl, xl
+ sub xh, yh, xh
+ bgeu yl, xl, 1f
+ addi xh, xh, -1
+1:
+ /* Subtract the leftover bits in a9 from zero and propagate any
+ borrow from xh/xl. */
+ neg a9, a9
+ beqz a9, 1f
+ addi a5, xh, -1
+ moveqz xh, a5, xl
+ addi xl, xl, -1
+1:
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, xh, 20, 11
+ bne a10, a8, .Lsub_borrow
+
+.Lsub_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi xl, xl, 1
+ beqz xl, .Lsub_roundcarry
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Lsub_exactlyhalf
+1: leaf_return
+
+.Lsub_xexpzero:
+ /* Same as "yexpzero". */
+ slli xh, xh, 12
+ srli xh, xh, 12
+ bnone yh, a6, .Lsub_xexpdiff
+ addi a7, a7, 1
+ j .Lsub_xexpdiff
+
+.Lsub_bigshiftx:
+ /* Mostly the same thing as "bigshifty", but with the sign bit of the
+ shifted value set so that the subsequent subtraction flips the
+ sign of y. */
+ bgeui a10, 64, .Lsub_returny
+
+ ssr a10
+ sll a11, xl
+ src a9, xh, xl
+ srl xl, xh
+ slli xh, a6, 11 /* set sign bit of xh */
+ beqz a11, .Lsub_subx
+ or a9, a9, a10
+ j .Lsub_subx
+
+.Lsub_returny:
+ /* Negate and return y. */
+ slli a7, a6, 11
+ xor xh, yh, a7
+ mov xl, yl
+ leaf_return
+
+.Lsub_borrow:
+ /* The subtraction has underflowed into the exponent field, so the
+ value needs to be renormalized. Shift the mantissa left as
+ needed to remove any leading zeros and adjust the exponent
+ accordingly. If the exponent is not large enough to remove
+ all the leading zeros, the result will be a subnormal value. */
+
+ slli a8, xh, 12
+ beqz a8, .Lsub_xhzero
+ do_nsau a6, a8, a7, a11
+ srli a8, a8, 12
+ bge a6, a10, .Lsub_subnormal
+ addi a6, a6, 1
+
+.Lsub_shift_lt32:
+ /* Shift the mantissa (a8/xl/a9) left by a6. */
+ ssl a6
+ src a8, a8, xl
+ src xl, xl, a9
+ sll a9, a9
+
+ /* Combine the shifted mantissa with the sign and exponent,
+ decrementing the exponent by a6. (The exponent has already
+ been decremented by one due to the borrow from the subtraction,
+ but adding the mantissa will increment the exponent by one.) */
+ srli xh, xh, 20
+ sub xh, xh, a6
+ slli xh, xh, 20
+ add xh, xh, a8
+ j .Lsub_round
+
+.Lsub_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ leaf_return
+
+.Lsub_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow to the exponent is OK. */
+ leaf_return
+
+.Lsub_xhzero:
+ /* When normalizing the result, all the mantissa bits in the high
+ word are zero. Shift by "20 + (leading zero count of xl) + 1". */
+ do_nsau a6, xl, a7, a11
+ addi a6, a6, 21
+ blt a10, a6, .Lsub_subnormal
+
+.Lsub_normalize_shift:
+ bltui a6, 32, .Lsub_shift_lt32
+
+ ssl a6
+ src a8, xl, a9
+ sll xl, a9
+ movi a9, 0
+
+ srli xh, xh, 20
+ sub xh, xh, a6
+ slli xh, xh, 20
+ add xh, xh, a8
+ j .Lsub_round
+
+.Lsub_subnormal:
+ /* The exponent is too small to shift away all the leading zeros.
+ Set a6 to the current exponent (which has already been
+ decremented by the borrow) so that the exponent of the result
+ will be zero. Do not add 1 to a6 in this case, because: (1)
+ adding the mantissa will not increment the exponent, so there is
+ no need to subtract anything extra from the exponent to
+ compensate, and (2) the effective exponent of a subnormal is 1
+ not 0 so the shift amount must be 1 smaller than normal. */
+ mov a6, a10
+ j .Lsub_normalize_shift
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_muldf3
+
+ /* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__muldf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Lmul_xexpzero:
+ /* Clear the sign bit of x. */
+ slli xh, xh, 1
+ srli xh, xh, 1
+
+ /* If x is zero, return zero. */
+ or a10, xh, xl
+ beqz a10, .Lmul_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ beqz xh, .Lmul_xh_zero
+ do_nsau a10, xh, a11, a12
+ addi a10, a10, -11
+ ssl a10
+ src xh, xh, xl
+ sll xl, xl
+ movi a8, 1
+ sub a8, a8, a10
+ j .Lmul_xnormalized
+.Lmul_xh_zero:
+ do_nsau a10, xl, a11, a12
+ addi a10, a10, -11
+ movi a8, -31
+ sub a8, a8, a10
+ ssl a10
+ bltz a10, .Lmul_xl_srl
+ sll xh, xl
+ movi xl, 0
+ j .Lmul_xnormalized
+.Lmul_xl_srl:
+ srl xh, xl
+ sll xl, xl
+ j .Lmul_xnormalized
+
+.Lmul_yexpzero:
+ /* Clear the sign bit of y. */
+ slli yh, yh, 1
+ srli yh, yh, 1
+
+ /* If y is zero, return zero. */
+ or a10, yh, yl
+ beqz a10, .Lmul_return_zero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ beqz yh, .Lmul_yh_zero
+ do_nsau a10, yh, a11, a12
+ addi a10, a10, -11
+ ssl a10
+ src yh, yh, yl
+ sll yl, yl
+ movi a9, 1
+ sub a9, a9, a10
+ j .Lmul_ynormalized
+.Lmul_yh_zero:
+ do_nsau a10, yl, a11, a12
+ addi a10, a10, -11
+ movi a9, -31
+ sub a9, a9, a10
+ ssl a10
+ bltz a10, .Lmul_yl_srl
+ sll yh, yl
+ movi yl, 0
+ j .Lmul_ynormalized
+.Lmul_yl_srl:
+ srl yh, yl
+ sll yl, yl
+ j .Lmul_ynormalized
+
+.Lmul_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ j .Lmul_done
+
+.Lmul_xnan_or_inf:
+ /* If y is zero, return NaN. */
+ bnez yl, 1f
+ slli a8, yh, 1
+ bnez a8, 1f
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+ j .Lmul_done
+1:
+ /* If y is NaN, return y. */
+ bnall yh, a6, .Lmul_returnx
+ slli a8, yh, 12
+ or a8, a8, yl
+ beqz a8, .Lmul_returnx
+
+.Lmul_returny:
+ mov xh, yh
+ mov xl, yl
+
+.Lmul_returnx:
+ /* Set the sign bit and return. */
+ extui a7, a7, 31, 1
+ slli xh, xh, 1
+ ssai 1
+ src xh, a7, xh
+ j .Lmul_done
+
+.Lmul_ynan_or_inf:
+ /* If x is zero, return NaN. */
+ bnez xl, .Lmul_returny
+ slli a8, xh, 1
+ bnez a8, .Lmul_returny
+ movi a7, 0x80000 /* make it a quiet NaN */
+ or xh, yh, a7
+ j .Lmul_done
+
+ .align 4
+ .global __muldf3
+ .type __muldf3, @function
+__muldf3:
+#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
+ addi sp, sp, -32
+ s32i a12, sp, 16
+ s32i a13, sp, 20
+ s32i a14, sp, 24
+ s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 64
+#else
+ leaf_entry sp, 32
+#endif
+ movi a6, 0x7ff00000
+
+ /* Get the sign of the result. */
+ xor a7, xh, yh
+
+ /* Check for NaN and infinity. */
+ ball xh, a6, .Lmul_xnan_or_inf
+ ball yh, a6, .Lmul_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, xh, 20, 11
+ extui a9, yh, 20, 11
+
+ beqz a8, .Lmul_xexpzero
+.Lmul_xnormalized:
+ beqz a9, .Lmul_yexpzero
+.Lmul_ynormalized:
+
+ /* Add the exponents. */
+ add a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0x1fffff
+ or xh, xh, a6
+ and xh, xh, a10
+ or yh, yh, a6
+ and yh, yh, a10
+
+ /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
+ The least-significant word of the result is thrown away except
+ that if it is nonzero, the lsb of a6 is set to 1. */
+#if XCHAL_HAVE_MUL32_HIGH
+
+ /* Compute a6 with any carry-outs in a10. */
+ movi a10, 0
+ mull a6, xl, yh
+ mull a11, xh, yl
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a10, a10, 1
+1:
+ muluh a11, xl, yl
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a10, a10, 1
+1:
+ /* If the low word of the result is nonzero, set the lsb of a6. */
+ mull a11, xl, yl
+ beqz a11, 1f
+ movi a9, 1
+ or a6, a6, a9
+1:
+ /* Compute xl with any carry-outs in a9. */
+ movi a9, 0
+ mull a11, xh, yh
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a9, a9, 1
+1:
+ muluh a11, xh, yl
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a9, a9, 1
+1:
+ muluh xl, xl, yh
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ /* Compute xh. */
+ muluh xh, xh, yh
+ add xh, xh, a9
+
+#else /* ! XCHAL_HAVE_MUL32_HIGH */
+
+ /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
+ products. These partial products are:
+
+ 0 xll * yll
+
+ 1 xll * ylh
+ 2 xlh * yll
+
+ 3 xll * yhl
+ 4 xlh * ylh
+ 5 xhl * yll
+
+ 6 xll * yhh
+ 7 xlh * yhl
+ 8 xhl * ylh
+ 9 xhh * yll
+
+ 10 xlh * yhh
+ 11 xhl * yhl
+ 12 xhh * ylh
+
+ 13 xhl * yhh
+ 14 xhh * yhl
+
+ 15 xhh * yhh
+
+ where the input chunks are (hh, hl, lh, ll). If using the Mul16
+ or Mul32 multiplier options, these input chunks must be stored in
+ separate registers. For Mac16, the UMUL.AA.* opcodes can specify
+ that the inputs come from either half of the registers, so there
+ is no need to shift them out ahead of time. If there is no
+ multiply hardware, the 16-bit chunks can be extracted when setting
+ up the arguments to the separate multiply function. */
+
+ /* Save a7 since it is needed to hold a temporary value. */
+ s32i a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Calling a separate multiply function will clobber a0 and requires
+ use of a8 as a temporary, so save those values now. (The function
+ uses a custom ABI so nothing else needs to be saved.) */
+ s32i a0, sp, 0
+ s32i a8, sp, 8
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define xlh a12
+#define ylh a13
+#define xhh a14
+#define yhh a15
+
+ /* Get the high halves of the inputs into registers. */
+ srli xlh, xl, 16
+ srli ylh, yl, 16
+ srli xhh, xh, 16
+ srli yhh, yh, 16
+
+#define xll xl
+#define yll yl
+#define xhl xh
+#define yhl yh
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+ /* Clear the high halves of the inputs. This does not matter
+ for MUL16 because the high bits are ignored. */
+ extui xl, xl, 0, 16
+ extui xh, xh, 0, 16
+ extui yl, yl, 0, 16
+ extui yh, yh, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mul16u dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mull dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+ a period in the definition of do_mul below. These macros are a workaround
+ using underscores instead of periods when doing the concatenation. */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ umul_aa_ ## xhalf ## yhalf xreg, yreg; \
+ rsr dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+ extui dst, src, 0, 16
+#define set_arg_h(dst, src) \
+ srli dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a13, xreg); \
+ set_arg_ ## yhalf (a14, yreg); \
+ call0 .Lmul_mulsi3; \
+ mov dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+ /* Add pp1 and pp2 into a10 with carry-out in a9. */
+ do_mul(a10, xl, l, yl, h) /* pp 1 */
+ do_mul(a11, xl, h, yl, l) /* pp 2 */
+ movi a9, 0
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Initialize a6 with a9/a10 shifted into position. Note that
+ this value can be safely incremented without any carry-outs. */
+ ssai 16
+ src a6, a9, a10
+
+ /* Compute the low word into a10. */
+ do_mul(a11, xl, l, yl, l) /* pp 0 */
+ sll a10, a10
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a6, a6, 1
+1:
+ /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
+ This is good enough to determine the low half of a6, so that any
+ nonzero bits from the low word of the result can be collapsed
+ into a6, freeing up a register. */
+ movi a9, 0
+ do_mul(a11, xl, l, yh, l) /* pp 3 */
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a11, xl, h, yl, h) /* pp 4 */
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a11, xh, l, yl, l) /* pp 5 */
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Collapse any nonzero bits from the low word into a6. */
+ beqz a10, 1f
+ movi a11, 1
+ or a6, a6, a11
+1:
+ /* Add pp6-9 into a11 with carry-outs in a10. */
+ do_mul(a7, xl, l, yh, h) /* pp 6 */
+ do_mul(a11, xh, h, yl, l) /* pp 9 */
+ movi a10, 0
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ do_mul(a7, xl, h, yh, l) /* pp 7 */
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ do_mul(a7, xh, l, yl, h) /* pp 8 */
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ /* Shift a10/a11 into position, and add low half of a11 to a6. */
+ src a10, a10, a11
+ add a10, a10, a9
+ sll a11, a11
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a10, a10, 1
+1:
+ /* Add pp10-12 into xl with carry-outs in a9. */
+ movi a9, 0
+ do_mul(xl, xl, h, yh, h) /* pp 10 */
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a10, xh, l, yh, l) /* pp 11 */
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a10, xh, h, yl, h) /* pp 12 */
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ /* Add pp13-14 into a11 with carry-outs in a10. */
+ do_mul(a11, xh, l, yh, h) /* pp 13 */
+ do_mul(a7, xh, h, yh, l) /* pp 14 */
+ movi a10, 0
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ /* Shift a10/a11 into position, and add low half of a11 to a6. */
+ src a10, a10, a11
+ add a10, a10, a9
+ sll a11, a11
+ add xl, xl, a11
+ bgeu xl, a11, 1f
+ addi a10, a10, 1
+1:
+ /* Compute xh. */
+ do_mul(xh, xh, h, yh, h) /* pp 15 */
+ add xh, xh, a10
+
+ /* Restore values saved on the stack during the multiplication. */
+ l32i a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ l32i a0, sp, 0
+ l32i a8, sp, 8
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+ /* Shift left by 12 bits, unless there was a carry-out from the
+ multiply, in which case, shift by 11 bits and increment the
+ exponent. Note: It is convenient to use the constant 0x3ff
+ instead of 0x400 when removing the extra exponent bias (so that
+ it is easy to construct 0x7fe for the overflow check). Reverse
+ the logic here to decrement the exponent sum by one unless there
+ was a carry-out. */
+ movi a4, 11
+ srli a5, xh, 21 - 12
+ bnez a5, 1f
+ addi a4, a4, 1
+ addi a8, a8, -1
+1: ssl a4
+ src xh, xh, xl
+ src xl, xl, a6
+ sll a6, a6
+
+ /* Subtract the extra bias from the exponent sum (plus one to account
+ for the explicit "1.0" of the mantissa that will be added to the
+ exponent in the final result). */
+ movi a4, 0x3ff
+ sub a8, a8, a4
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..7fd are OK here. */
+ slli a4, a4, 1 /* 0x7fe */
+ bgeu a8, a4, .Lmul_overflow
+
+.Lmul_round:
+ /* Round. */
+ bgez a6, .Lmul_rounded
+ addi xl, xl, 1
+ beqz xl, .Lmul_roundcarry
+ slli a6, a6, 1
+ beqz a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 20
+ add xh, xh, a8
+
+.Lmul_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or xh, xh, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+ l32i a12, sp, 16
+ l32i a13, sp, 20
+ l32i a14, sp, 24
+ l32i a15, sp, 28
+ addi sp, sp, 32
+#endif
+ leaf_return
+
+.Lmul_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ j .Lmul_rounded
+
+.Lmul_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow is OK -- it will be added to the exponent. */
+ j .Lmul_rounded
+
+.Lmul_overflow:
+ bltz a8, .Lmul_underflow
+ /* Return +/- Infinity. */
+ addi a8, a4, 1 /* 0x7ff */
+ slli xh, a8, 20
+ movi xl, 0
+ j .Lmul_addsign
+
+.Lmul_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ mov a9, a6
+ ssr a8
+ bgeui a8, 32, .Lmul_bigshift
+
+ /* Shift xh/xl right. Any bits that are shifted out of xl are saved
+ in a6 (combined with the shifted-out bits currently in a6) for
+ rounding the result. */
+ sll a6, xl
+ src xl, xh, xl
+ srl xh, xh
+ j 1f
+
+.Lmul_bigshift:
+ bgeui a8, 64, .Lmul_flush_to_zero
+ sll a10, xl /* lost bits shifted out of xl */
+ src a6, xh, xl
+ srl xl, xh
+ movi xh, 0
+ or a9, a9, a10
+
+ /* Set the exponent to zero. */
+1: movi a8, 0
+
+ /* Pack any nonzero bits shifted out into a6. */
+ beqz a9, .Lmul_round
+ movi a9, 1
+ or a6, a6, a9
+ j .Lmul_round
+
+.Lmul_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ j .Lmul_done
+
+#if XCHAL_NO_MUL
+
+ /* For Xtensa processors with no multiply hardware, this simplified
+ version of _mulsi3 is used for multiplying 16-bit chunks of
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
+ .align 4
+.Lmul_mulsi3:
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_muldf3 */
+
+#ifdef L_divdf3
+
+ /* Division */
+__divdf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Ldiv_yexpzero:
+ /* Clear the sign bit of y. */
+ slli yh, yh, 1
+ srli yh, yh, 1
+
+ /* Check for division by zero. */
+ or a10, yh, yl
+ beqz a10, .Ldiv_yzero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ beqz yh, .Ldiv_yh_zero
+ do_nsau a10, yh, a11, a9
+ addi a10, a10, -11
+ ssl a10
+ src yh, yh, yl
+ sll yl, yl
+ movi a9, 1
+ sub a9, a9, a10
+ j .Ldiv_ynormalized
+.Ldiv_yh_zero:
+ do_nsau a10, yl, a11, a9
+ addi a10, a10, -11
+ movi a9, -31
+ sub a9, a9, a10
+ ssl a10
+ bltz a10, .Ldiv_yl_srl
+ sll yh, yl
+ movi yl, 0
+ j .Ldiv_ynormalized
+.Ldiv_yl_srl:
+ srl yh, yl
+ sll yl, yl
+ j .Ldiv_ynormalized
+
+.Ldiv_yzero:
+ /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
+ slli xh, xh, 1
+ srli xh, xh, 1
+ or xl, xl, xh
+ srli xh, a7, 31
+ slli xh, xh, 31
+ or xh, xh, a6
+ bnez xl, 1f
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+1: movi xl, 0
+ leaf_return
+
+.Ldiv_xexpzero:
+ /* Clear the sign bit of x. */
+ slli xh, xh, 1
+ srli xh, xh, 1
+
+ /* If x is zero, return zero. */
+ or a10, xh, xl
+ beqz a10, .Ldiv_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ beqz xh, .Ldiv_xh_zero
+ do_nsau a10, xh, a11, a8
+ addi a10, a10, -11
+ ssl a10
+ src xh, xh, xl
+ sll xl, xl
+ movi a8, 1
+ sub a8, a8, a10
+ j .Ldiv_xnormalized
+.Ldiv_xh_zero:
+ do_nsau a10, xl, a11, a8
+ addi a10, a10, -11
+ movi a8, -31
+ sub a8, a8, a10
+ ssl a10
+ bltz a10, .Ldiv_xl_srl
+ sll xh, xl
+ movi xl, 0
+ j .Ldiv_xnormalized
+.Ldiv_xl_srl:
+ srl xh, xl
+ sll xl, xl
+ j .Ldiv_xnormalized
+
+.Ldiv_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ leaf_return
+
+.Ldiv_xnan_or_inf:
+ /* Set the sign bit of the result. */
+ srli a7, yh, 31
+ slli a7, a7, 31
+ xor xh, xh, a7
+ /* If y is NaN or Inf, return NaN. */
+ bnall yh, a6, 1f
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+1: leaf_return
+
+.Ldiv_ynan_or_inf:
+ /* If y is Infinity, return zero. */
+ slli a8, yh, 12
+ or a8, a8, yl
+ beqz a8, .Ldiv_return_zero
+ /* y is NaN; return it. */
+ mov xh, yh
+ mov xl, yl
+ leaf_return
+
+.Ldiv_highequal1:
+ bltu xl, yl, 2f
+ j 3f
+
+ .align 4
+ .global __divdf3
+ .type __divdf3, @function
+__divdf3:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+
+ /* Get the sign of the result. */
+ xor a7, xh, yh
+
+ /* Check for NaN and infinity. */
+ ball xh, a6, .Ldiv_xnan_or_inf
+ ball yh, a6, .Ldiv_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, xh, 20, 11
+ extui a9, yh, 20, 11
+
+ beqz a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:
+ beqz a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:
+
+ /* Subtract the exponents. */
+ sub a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0x1fffff
+ or xh, xh, a6
+ and xh, xh, a10
+ or yh, yh, a6
+ and yh, yh, a10
+
+ /* Set SAR for left shift by one. */
+ ssai (32 - 1)
+
+ /* The first digit of the mantissa division must be a one.
+ Shift x (and adjust the exponent) as needed to make this true. */
+ bltu yh, xh, 3f
+ beq yh, xh, .Ldiv_highequal1
+2: src xh, xh, xl
+ sll xl, xl
+ addi a8, a8, -1
+3:
+ /* Do the first subtraction and shift. */
+ sub xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, -1
+1: sub xl, xl, yl
+ src xh, xh, xl
+ sll xl, xl
+
+ /* Put the quotient into a10/a11. */
+ movi a10, 0
+ movi a11, 1
+
+ /* Divide one bit at a time for 52 bits. */
+ movi a9, 52
+#if XCHAL_HAVE_LOOPS
+ loop a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+ /* Shift the quotient << 1. */
+ src a10, a10, a11
+ sll a11, a11
+
+ /* Is this digit a 0 or 1? */
+ bltu xh, yh, 3f
+ beq xh, yh, .Ldiv_highequal2
+
+ /* Output a 1 and subtract. */
+2: addi a11, a11, 1
+ sub xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, -1
+1: sub xl, xl, yl
+
+ /* Shift the dividend << 1. */
+3: src xh, xh, xl
+ sll xl, xl
+
+#if !XCHAL_HAVE_LOOPS
+ addi a9, a9, -1
+ bnez a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+ /* Add the exponent bias (less one to account for the explicit "1.0"
+ of the mantissa that will be added to the exponent in the final
+ result). */
+ movi a9, 0x3fe
+ add a8, a8, a9
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..7fd are OK here. */
+ addmi a9, a9, 0x400 /* 0x7fe */
+ bgeu a8, a9, .Ldiv_overflow
+
+.Ldiv_round:
+ /* Round. The remainder (<< 1) is in xh/xl. */
+ bltu xh, yh, .Ldiv_rounded
+ beq xh, yh, .Ldiv_highequal3
+.Ldiv_roundup:
+ addi a11, a11, 1
+ beqz a11, .Ldiv_roundcarry
+
+.Ldiv_rounded:
+ mov xl, a11
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 20
+ add xh, a10, a8
+
+.Ldiv_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or xh, xh, a7
+ leaf_return
+
+.Ldiv_highequal2:
+ bgeu xl, yl, 2b
+ j 3b
+
+.Ldiv_highequal3:
+ bltu xl, yl, .Ldiv_rounded
+ bne xl, yl, .Ldiv_roundup
+
+ /* Remainder is exactly half the divisor. Round even. */
+ addi a11, a11, 1
+ beqz a11, .Ldiv_roundcarry
+ srli a11, a11, 1
+ slli a11, a11, 1
+ j .Ldiv_rounded
+
+.Ldiv_overflow:
+ bltz a8, .Ldiv_underflow
+ /* Return +/- Infinity. */
+ addi a8, a9, 1 /* 0x7ff */
+ slli xh, a8, 20
+ movi xl, 0
+ j .Ldiv_addsign
+
+.Ldiv_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ ssr a8
+ bgeui a8, 32, .Ldiv_bigshift
+
+ /* Shift a10/a11 right. Any bits that are shifted out of a11 are
+ saved in a6 for rounding the result. */
+ sll a6, a11
+ src a11, a10, a11
+ srl a10, a10
+ j 1f
+
+.Ldiv_bigshift:
+ bgeui a8, 64, .Ldiv_flush_to_zero
+ sll a9, a11 /* lost bits shifted out of a11 */
+ src a6, a10, a11
+ srl a11, a10
+ movi a10, 0
+ or xl, xl, a9
+
+ /* Set the exponent to zero. */
+1: movi a8, 0
+
+ /* Pack any nonzero remainder (in xh/xl) into a6. */
+ or xh, xh, xl
+ beqz xh, 1f
+ movi a9, 1
+ or a6, a6, a9
+
+ /* Round a10/a11 based on the bits shifted out into a6. */
+1: bgez a6, .Ldiv_rounded
+ addi a11, a11, 1
+ beqz a11, .Ldiv_roundcarry
+ slli a6, a6, 1
+ bnez a6, .Ldiv_rounded
+ srli a11, a11, 1
+ slli a11, a11, 1
+ j .Ldiv_rounded
+
+.Ldiv_roundcarry:
+ /* a11 is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi a10, a10, 1
+ /* Overflow to the exponent field is OK. */
+ j .Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ leaf_return
+
+#endif /* L_divdf3 */
+
+#ifdef L_cmpdf2
+
+ /* Equal and Not Equal */
+
+ .align 4
+ .global __eqdf2
+ .global __nedf2
+ .set __nedf2, __eqdf2
+ .type __eqdf2, @function
+__eqdf2:
+ leaf_entry sp, 16
+ bne xl, yl, 2f
+ bne xh, yh, 4f
+
+ /* The values are equal but NaN != NaN. Check the exponent. */
+ movi a6, 0x7ff00000
+ ball xh, a6, 3f
+
+ /* Equal. */
+ movi a2, 0
+ leaf_return
+
+ /* Not equal. */
+2: movi a2, 1
+ leaf_return
+
+ /* Check if the mantissas are nonzero. */
+3: slli a7, xh, 12
+ or a7, a7, xl
+ j 5f
+
+ /* Check if x and y are zero with different signs. */
+4: or a7, xh, yh
+ slli a7, a7, 1
+ or a7, a7, xl /* xl == yl here */
+
+ /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+ or x when exponent(x) = 0x7ff and x == y. */
+5: movi a2, 0
+ movi a3, 1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than */
+
+ .align 4
+ .global __gtdf2
+ .type __gtdf2, @function
+__gtdf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Lle_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+
+ /* Less Than or Equal */
+
+ .align 4
+ .global __ledf2
+ .type __ledf2, @function
+__ledf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Lle_cmp
+ movi a2, 1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+.Lle_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, xh, yh
+ bltz a7, .Lle_diff_signs
+
+ /* Check if x is negative. */
+ bltz xh, .Lle_xneg
+
+ /* Check if x <= y. */
+ bltu xh, yh, 4f
+ bne xh, yh, 5f
+ bltu yl, xl, 5f
+4: movi a2, 0
+ leaf_return
+
+.Lle_xneg:
+ /* Check if y <= x. */
+ bltu yh, xh, 4b
+ bne yh, xh, 5f
+ bgeu xl, yl, 4b
+5: movi a2, 1
+ leaf_return
+
+.Lle_diff_signs:
+ bltz xh, 4b
+
+ /* Check if both x and y are zero. */
+ or a7, xh, yh
+ slli a7, a7, 1
+ or a7, a7, xl
+ or a7, a7, yl
+ movi a2, 1
+ movi a3, 0
+ moveqz a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than or Equal */
+
+ .align 4
+ .global __gedf2
+ .type __gedf2, @function
+__gedf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Llt_cmp
+ movi a2, -1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, -1
+ leaf_return
+
+
+ /* Less Than */
+
+ .align 4
+ .global __ltdf2
+ .type __ltdf2, @function
+__ltdf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Llt_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+.Llt_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, xh, yh
+ bltz a7, .Llt_diff_signs
+
+ /* Check if x is negative. */
+ bltz xh, .Llt_xneg
+
+ /* Check if x < y. */
+ bltu xh, yh, 4f
+ bne xh, yh, 5f
+ bgeu xl, yl, 5f
+4: movi a2, -1
+ leaf_return
+
+.Llt_xneg:
+ /* Check if y < x. */
+ bltu yh, xh, 4b
+ bne yh, xh, 5f
+ bltu yl, xl, 4b
+5: movi a2, 0
+ leaf_return
+
+.Llt_diff_signs:
+ bgez xh, 5b
+
+ /* Check if both x and y are nonzero. */
+ or a7, xh, yh
+ slli a7, a7, 1
+ or a7, a7, xl
+ or a7, a7, yl
+ movi a2, 0
+ movi a3, -1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Unordered */
+
+ .align 4
+ .global __unorddf2
+ .type __unorddf2, @function
+__unorddf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 3f
+1: ball yh, a6, 4f
+2: movi a2, 0
+ leaf_return
+
+3: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+4: slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, 2b
+ movi a2, 1
+ leaf_return
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_fixdfsi
+
+ .align 4
+ .global __fixdfsi
+ .type __fixdfsi, @function
+__fixdfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixdfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 19, 10 /* 0x3fe */
+ sub a4, a4, a5
+ bgei a4, 32, .Lfixdfsi_maxint
+ blti a4, 1, .Lfixdfsi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src a5, a7, xl
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixdfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixdfsi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi xh, 0
+
+.Lfixdfsi_maxint:
+ slli a4, a6, 11 /* 0x80000000 */
+ addi a5, a4, -1 /* 0x7fffffff */
+ movgez a4, a5, xh
+ mov a2, a4
+ leaf_return
+
+.Lfixdfsi_zero:
+ movi a2, 0
+ leaf_return
+
+#endif /* L_fixdfsi */
+
+#ifdef L_fixdfdi
+
+ .align 4
+ .global __fixdfdi
+ .type __fixdfdi, @function
+__fixdfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixdfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 19, 10 /* 0x3fe */
+ sub a4, a4, a5
+ bgei a4, 64, .Lfixdfdi_maxint
+ blti a4, 1, .Lfixdfdi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src xh, a7, xl
+ sll xl, xl
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixdfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixdfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixdfdi_smallshift:
+ src xl, xh, xl
+ srl xh, xh
+ j .Lfixdfdi_shifted
+
+.Lfixdfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixdfdi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi xh, 0
+
+.Lfixdfdi_maxint:
+ slli a7, a6, 11 /* 0x80000000 */
+ bgez xh, 1f
+ mov xh, a7
+ movi xl, 0
+ leaf_return
+
+1: addi xh, a7, -1 /* 0x7fffffff */
+ movi xl, -1
+ leaf_return
+
+.Lfixdfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+#endif /* L_fixdfdi */
+
+#ifdef L_fixunsdfsi
+
+ .align 4
+ .global __fixunsdfsi
+ .type __fixunsdfsi, @function
+__fixunsdfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixunsdfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 20, 10 /* 0x3ff */
+ sub a4, a4, a5
+ bgei a4, 32, .Lfixunsdfsi_maxint
+ bltz a4, .Lfixunsdfsi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src a5, a7, xl
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 32, .Lfixunsdfsi_bigexp
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixunsdfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixunsdfsi_maxint
+
+ /* Translate NaN to 0xffffffff. */
+ movi a2, -1
+ leaf_return
+
+.Lfixunsdfsi_maxint:
+ slli a4, a6, 11 /* 0x80000000 */
+ movi a5, -1 /* 0xffffffff */
+ movgez a4, a5, xh
+ mov a2, a4
+ leaf_return
+
+.Lfixunsdfsi_zero:
+ movi a2, 0
+ leaf_return
+
+.Lfixunsdfsi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz xh, 1f
+ mov a2, a5 /* no shift needed */
+ leaf_return
+
+ /* Return 0x80000000 if negative. */
+1: slli a2, a6, 11
+ leaf_return
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_fixunsdfdi
+
+ .align 4
+ .global __fixunsdfdi
+ .type __fixunsdfdi, @function
+__fixunsdfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixunsdfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 20, 10 /* 0x3ff */
+ sub a4, a4, a5
+ bgei a4, 64, .Lfixunsdfdi_maxint
+ bltz a4, .Lfixunsdfdi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src xh, a7, xl
+ sll xl, xl
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 64, .Lfixunsdfdi_bigexp
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixunsdfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixunsdfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixunsdfdi_smallshift:
+ src xl, xh, xl
+ srl xh, xh
+ j .Lfixunsdfdi_shifted
+
+.Lfixunsdfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixunsdfdi_maxint
+
+ /* Translate NaN to 0xffffffff.... */
+1: movi xh, -1
+ movi xl, -1
+ leaf_return
+
+.Lfixunsdfdi_maxint:
+ bgez xh, 1b
+2: slli xh, a6, 11 /* 0x80000000 */
+ movi xl, 0
+ leaf_return
+
+.Lfixunsdfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+.Lfixunsdfdi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a7, 2b
+ leaf_return /* no shift needed */
+
+#endif /* L_fixunsdfdi */
+
+#ifdef L_floatsidf
+
+ .align 4
+ .global __floatunsidf
+ .type __floatunsidf, @function
+__floatunsidf:
+ leaf_entry sp, 16
+ beqz a2, .Lfloatsidf_return_zero
+
+ /* Set the sign to zero and jump to the floatsidf code. */
+ movi a7, 0
+ j .Lfloatsidf_normalize
+
+ .align 4
+ .global __floatsidf
+ .type __floatsidf, @function
+__floatsidf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ beqz a2, .Lfloatsidf_return_zero
+
+ /* Save the sign. */
+ extui a7, a2, 31, 1
+
+ /* Get the absolute value. */
+#if XCHAL_HAVE_ABS
+ abs a2, a2
+#else
+ neg a4, a2
+ movltz a2, a4, a2
+#endif
+
+.Lfloatsidf_normalize:
+ /* Normalize with the first 1 bit in the msb. */
+ do_nsau a4, a2, a5, a6
+ ssl a4
+ sll a5, a2
+
+ /* Shift the mantissa into position. */
+ srli xh, a5, 11
+ slli xl, a5, (32 - 11)
+
+ /* Set the exponent. */
+ movi a5, 0x41d /* 0x3fe + 31 */
+ sub a5, a5, a4
+ slli a5, a5, 20
+ add xh, xh, a5
+
+ /* Add the sign and return. */
+ slli a7, a7, 31
+ or xh, xh, a7
+ leaf_return
+
+.Lfloatsidf_return_zero:
+ movi a3, 0
+ leaf_return
+
+#endif /* L_floatsidf */
+
+#ifdef L_floatdidf
+
+ .align 4
+ .global __floatundidf
+ .type __floatundidf, @function
+__floatundidf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Set the sign to zero and jump to the floatdidf code. */
+ movi a7, 0
+ j .Lfloatdidf_normalize
+
+ .align 4
+ .global __floatdidf
+ .type __floatdidf, @function
+__floatdidf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Save the sign. */
+ extui a7, xh, 31, 1
+
+ /* Get the absolute value. */
+ bgez xh, .Lfloatdidf_normalize
+ neg xl, xl
+ neg xh, xh
+ beqz xl, .Lfloatdidf_normalize
+ addi xh, xh, -1
+
+.Lfloatdidf_normalize:
+ /* Normalize with the first 1 bit in the msb of xh. */
+ beqz xh, .Lfloatdidf_bigshift
+ do_nsau a4, xh, a5, a6
+ ssl a4
+ src xh, xh, xl
+ sll xl, xl
+
+.Lfloatdidf_shifted:
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ ssai 11
+ sll a6, xl
+ src xl, xh, xl
+ srl xh, xh
+
+ /* Set the exponent. */
+ movi a5, 0x43d /* 0x3fe + 63 */
+ sub a5, a5, a4
+ slli a5, a5, 20
+ add xh, xh, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or xh, xh, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, 2f
+ addi xl, xl, 1
+ beqz xl, .Lfloatdidf_roundcarry
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatdidf_exactlyhalf
+2: leaf_return
+
+.Lfloatdidf_bigshift:
+ /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
+ do_nsau a4, xl, a5, a6
+ ssl a4
+ sll xh, xl
+ movi xl, 0
+ addi a4, a4, 32
+ j .Lfloatdidf_shifted
+
+.Lfloatdidf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ leaf_return
+
+.Lfloatdidf_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow to the exponent is OK. */
+ leaf_return
+
+#endif /* L_floatdidf */
+
+#ifdef L_truncdfsf2
+
+ .align 4
+ .global __truncdfsf2
+ .type __truncdfsf2, @function
+__truncdfsf2:
+ leaf_entry sp, 16
+
+ /* Adjust the exponent bias. */
+ movi a4, (0x3ff - 0x7f) << 20
+ sub a5, xh, a4
+
+ /* Check for underflow. */
+ xor a6, xh, a5
+ bltz a6, .Ltrunc_underflow
+ extui a6, a5, 20, 11
+ beqz a6, .Ltrunc_underflow
+
+ /* Check for overflow. */
+ movi a4, 255
+ bge a6, a4, .Ltrunc_overflow
+
+ /* Shift a5/xl << 3 into a5/a4. */
+ ssai (32 - 3)
+ src a5, a5, xl
+ sll a4, xl
+
+.Ltrunc_addsign:
+ /* Add the sign bit. */
+ extui a6, xh, 31, 1
+ slli a6, a6, 31
+ or a2, a6, a5
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a4, 1f
+ addi a2, a2, 1
+ /* Overflow to the exponent is OK. The answer will be correct. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a4, a4, 1
+ beqz a4, .Ltrunc_exactlyhalf
+1: leaf_return
+
+.Ltrunc_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+.Ltrunc_overflow:
+ /* Check if exponent == 0x7ff. */
+ movi a4, 0x7ff00000
+ bnall xh, a4, 1f
+
+ /* Check if mantissa is nonzero. */
+ slli a5, xh, 12
+ or a5, a5, xl
+ beqz a5, 1f
+
+ /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
+ srli a4, a4, 1
+
+1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
+ /* Add the sign bit. */
+ extui a6, xh, 31, 1
+ ssai 1
+ src a2, a6, a4
+ leaf_return
+
+.Ltrunc_underflow:
+ /* Find shift count for a subnormal. Flush to zero if >= 32. */
+ extui a6, xh, 20, 11
+ movi a5, 0x3ff - 0x7f
+ sub a6, a5, a6
+ addi a6, a6, 1
+ bgeui a6, 32, 1f
+
+ /* Replace the exponent with an explicit "1.0". */
+ slli a5, a5, 13 /* 0x700000 */
+ or a5, a5, xh
+ slli a5, a5, 11
+ srli a5, a5, 11
+
+ /* Shift the mantissa left by 3 bits (into a5/a4). */
+ ssai (32 - 3)
+ src a5, a5, xl
+ sll a4, xl
+
+ /* Shift right by a6. */
+ ssr a6
+ sll a7, a4
+ src a4, a5, a4
+ srl a5, a5
+ beqz a7, .Ltrunc_addsign
+ or a4, a4, a6 /* any positive, nonzero value will work */
+ j .Ltrunc_addsign
+
+ /* Return +/- zero. */
+1: extui a2, xh, 31, 1
+ slli a2, a2, 31
+ leaf_return
+
+#endif /* L_truncdfsf2 */
+
+#ifdef L_extendsfdf2
+
+ .align 4
+ .global __extendsfdf2
+ .type __extendsfdf2, @function
+__extendsfdf2:
+ leaf_entry sp, 16
+
+ /* Save the sign bit and then shift it off. */
+ extui a5, a2, 31, 1
+ slli a5, a5, 31
+ slli a4, a2, 1
+
+ /* Extract and check the exponent. */
+ extui a6, a2, 23, 8
+ beqz a6, .Lextend_expzero
+ addi a6, a6, 1
+ beqi a6, 256, .Lextend_nan_or_inf
+
+ /* Shift >> 3 into a4/xl. */
+ srli a4, a4, 4
+ slli xl, a2, (32 - 3)
+
+ /* Adjust the exponent bias. */
+ movi a6, (0x3ff - 0x7f) << 20
+ add a4, a4, a6
+
+ /* Add the sign bit. */
+ or xh, a4, a5
+ leaf_return
+
+.Lextend_nan_or_inf:
+ movi a4, 0x7ff00000
+
+ /* Check for NaN. */
+ slli a7, a2, 9
+ beqz a7, 1f
+
+ slli a6, a6, 11 /* 0x80000 */
+ or a4, a4, a6
+
+ /* Add the sign and return. */
+1: or xh, a4, a5
+ movi xl, 0
+ leaf_return
+
+.Lextend_expzero:
+ beqz a4, 1b
+
+ /* Normalize it to have 8 zero bits before the first 1 bit. */
+ do_nsau a7, a4, a2, a3
+ addi a7, a7, -8
+ ssl a7
+ sll a4, a4
+
+ /* Shift >> 3 into a4/xl. */
+ slli xl, a4, (32 - 3)
+ srli a4, a4, 3
+
+ /* Set the exponent. */
+ movi a6, 0x3fe - 0x7f
+ sub a6, a6, a7
+ slli a6, a6, 20
+ add a4, a4, a6
+
+ /* Add the sign and return. */
+ or xh, a4, a5
+ leaf_return
+
+#endif /* L_extendsfdf2 */
+
+
diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S
new file mode 100644
index 000000000..d75be0e5a
--- /dev/null
+++ b/gcc/config/xtensa/ieee754-sf.S
@@ -0,0 +1,1757 @@
+/* IEEE-754 single-precision functions for Xtensa
+ Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/* Warning! The branch displacements for some Xtensa branch instructions
+ are quite small, and this code has been carefully laid out to keep
+ branch targets in range. If you change anything, be sure to check that
+ the assembler is not relaxing anything to branch over a jump. */
+
+#ifdef L_negsf2
+
+ .align 4
+ .global __negsf2
+ .type __negsf2, @function
+__negsf2:
+ leaf_entry sp, 16
+ movi a4, 0x80000000
+ xor a2, a2, a4
+ leaf_return
+
+#endif /* L_negsf2 */
+
+#ifdef L_addsubsf3
+
+ /* Addition */
+__addsf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Ladd_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall a3, a6, 1f
+ /* If x is a NaN, return it. Otherwise, return y. */
+ slli a7, a2, 9
+ beqz a7, .Ladd_ynan_or_inf
+1: leaf_return
+
+.Ladd_ynan_or_inf:
+ /* Return y. */
+ mov a2, a3
+ leaf_return
+
+.Ladd_opposite_signs:
+ /* Operand signs differ. Do a subtraction. */
+ slli a7, a6, 8
+ xor a3, a3, a7
+ j .Lsub_same_sign
+
+ .align 4
+ .global __addsf3
+ .type __addsf3, @function
+__addsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, a2, a3
+ bltz a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:
+ /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
+ ball a2, a6, .Ladd_xnan_or_inf
+ ball a3, a6, .Ladd_ynan_or_inf
+
+ /* Compare the exponents. The smaller operand will be shifted
+ right by the exponent difference and added to the larger
+ one. */
+ extui a7, a2, 23, 9
+ extui a8, a3, 23, 9
+ bltu a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone a3, a6, .Ladd_yexpzero
+
+ /* Replace y sign/exponent with 0x008. */
+ or a3, a3, a6
+ slli a3, a3, 8
+ srli a3, a3, 8
+
+.Ladd_yexpdiff:
+ /* Compute the exponent difference. */
+ sub a10, a7, a8
+
+ /* Exponent difference > 32 -- just return the bigger value. */
+ bgeui a10, 32, 1f
+
+ /* Shift y right by the exponent difference. Any bits that are
+ shifted out of y are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, a3, a9
+ srl a3, a3
+
+ /* Do the addition. */
+ add a2, a2, a3
+
+ /* Check if the add overflowed into the exponent. */
+ extui a10, a2, 23, 9
+ beq a10, a7, .Ladd_round
+ mov a8, a7
+ j .Ladd_carry
+
+.Ladd_yexpzero:
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0", and increment the apparent exponent
+ because subnormals behave as if they had the minimum (nonzero)
+ exponent. Test for the case when both exponents are zero. */
+ slli a3, a3, 9
+ srli a3, a3, 9
+ bnone a2, a6, .Ladd_bothexpzero
+ addi a8, a8, 1
+ j .Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+ /* Both exponents are zero. Handle this as a special case. There
+ is no need to shift or round, and the normal code for handling
+ a carry into the exponent field will not work because it
+ assumes there is an implicit "1.0" that needs to be added. */
+ add a2, a2, a3
+1: leaf_return
+
+.Ladd_xexpzero:
+ /* Same as "yexpzero" except skip handling the case when both
+ exponents are zero. */
+ slli a2, a2, 9
+ srli a2, a2, 9
+ addi a7, a7, 1
+ j .Ladd_xexpdiff
+
+.Ladd_shiftx:
+ /* Same thing as the "shifty" code, but with x and y swapped. Also,
+ because the exponent difference is always nonzero in this version,
+ the shift sequence can use SLL and skip loading a constant zero. */
+ bnone a2, a6, .Ladd_xexpzero
+
+ or a2, a2, a6
+ slli a2, a2, 8
+ srli a2, a2, 8
+
+.Ladd_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Ladd_returny
+
+ ssr a10
+ sll a9, a2
+ srl a2, a2
+
+ add a2, a2, a3
+
+ /* Check if the add overflowed into the exponent. */
+ extui a10, a2, 23, 9
+ bne a10, a8, .Ladd_carry
+
+.Ladd_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi a2, a2, 1
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_returny:
+ mov a2, a3
+ leaf_return
+
+.Ladd_carry:
+ /* The addition has overflowed into the exponent field, so the
+ value needs to be renormalized. The mantissa of the result
+ can be recovered by subtracting the original exponent and
+ adding 0x800000 (which is the explicit "1.0" for the
+ mantissa of the non-shifted operand -- the "1.0" for the
+ shifted operand was already added). The mantissa can then
+ be shifted right by one bit. The explicit "1.0" of the
+ shifted mantissa then needs to be replaced by the exponent,
+ incremented by one to account for the normalizing shift.
+ It is faster to combine these operations: do the shift first
+ and combine the additions and subtractions. If x is the
+ original exponent, the result is:
+ shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
+ or:
+ shifted mantissa + ((x + 1) << 22)
+ Note that the exponent is incremented here by leaving the
+ explicit "1.0" of the mantissa in the exponent field. */
+
+ /* Shift x right by one bit. Save the lsb. */
+ mov a10, a2
+ srli a2, a2, 1
+
+ /* See explanation above. The original exponent is in a8. */
+ addi a8, a8, 1
+ slli a8, a8, 22
+ add a2, a2, a8
+
+ /* Return an Infinity if the exponent overflowed. */
+ ball a2, a6, .Ladd_infinity
+
+ /* Same thing as the "round" code except the msb of the leftover
+ fraction is bit 0 of a10, with the rest of the fraction in a9. */
+ bbci.l a10, 0, 1f
+ addi a2, a2, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_infinity:
+ /* Clear the mantissa. */
+ srli a2, a2, 23
+ slli a2, a2, 23
+
+ /* The sign bit may have been lost in a carry-out. Put it back. */
+ slli a8, a8, 1
+ or a2, a2, a8
+ leaf_return
+
+.Ladd_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+
+ /* Subtraction */
+__subsf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Lsub_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall a3, a6, 1f
+ /* Both x and y are either NaN or Inf, so the result is NaN. */
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Lsub_ynan_or_inf:
+ /* Negate y and return it. */
+ slli a7, a6, 8
+ xor a2, a3, a7
+ leaf_return
+
+.Lsub_opposite_signs:
+ /* Operand signs differ. Do an addition. */
+ slli a7, a6, 8
+ xor a3, a3, a7
+ j .Ladd_same_sign
+
+ .align 4
+ .global __subsf3
+ .type __subsf3, @function
+__subsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, a2, a3
+ bltz a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:
+ /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
+ ball a2, a6, .Lsub_xnan_or_inf
+ ball a3, a6, .Lsub_ynan_or_inf
+
+ /* Compare the operands. In contrast to addition, the entire
+ value matters here. */
+ extui a7, a2, 23, 8
+ extui a8, a3, 23, 8
+ bltu a2, a3, .Lsub_xsmaller
+
+.Lsub_ysmaller:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone a3, a6, .Lsub_yexpzero
+
+ /* Replace y sign/exponent with 0x008. */
+ or a3, a3, a6
+ slli a3, a3, 8
+ srli a3, a3, 8
+
+.Lsub_yexpdiff:
+ /* Compute the exponent difference. */
+ sub a10, a7, a8
+
+ /* Exponent difference > 32 -- just return the bigger value. */
+ bgeui a10, 32, 1f
+
+ /* Shift y right by the exponent difference. Any bits that are
+ shifted out of y are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, a3, a9
+ srl a3, a3
+
+ sub a2, a2, a3
+
+ /* Subtract the leftover bits in a9 from zero and propagate any
+ borrow from a2. */
+ neg a9, a9
+ addi a10, a2, -1
+ movnez a2, a10, a9
+
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, a2, 23, 8
+ beq a10, a7, .Lsub_round
+ j .Lsub_borrow
+
+.Lsub_yexpzero:
+ /* Return zero if the inputs are equal. (For the non-subnormal
+ case, subtracting the "1.0" will cause a borrow from the exponent
+ and this case can be detected when handling the borrow.) */
+ beq a2, a3, .Lsub_return_zero
+
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0". Unless x is also a subnormal, increment
+ y's apparent exponent because subnormals behave as if they had
+ the minimum (nonzero) exponent. */
+ slli a3, a3, 9
+ srli a3, a3, 9
+ bnone a2, a6, .Lsub_yexpdiff
+ addi a8, a8, 1
+ j .Lsub_yexpdiff
+
+.Lsub_returny:
+ /* Negate and return y. */
+ slli a7, a6, 8
+ xor a2, a3, a7
+1: leaf_return
+
+.Lsub_xsmaller:
+ /* Same thing as the "ysmaller" code, but with x and y swapped and
+ with y negated. */
+ bnone a2, a6, .Lsub_xexpzero
+
+ or a2, a2, a6
+ slli a2, a2, 8
+ srli a2, a2, 8
+
+.Lsub_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Lsub_returny
+
+ ssr a10
+ movi a9, 0
+ src a9, a2, a9
+ srl a2, a2
+
+ /* Negate y. */
+ slli a11, a6, 8
+ xor a3, a3, a11
+
+ sub a2, a3, a2
+
+ neg a9, a9
+ addi a10, a2, -1
+ movnez a2, a10, a9
+
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, a2, 23, 8
+ bne a10, a8, .Lsub_borrow
+
+.Lsub_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi a2, a2, 1
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Lsub_exactlyhalf
+1: leaf_return
+
+.Lsub_xexpzero:
+ /* Same as "yexpzero". */
+ beq a2, a3, .Lsub_return_zero
+ slli a2, a2, 9
+ srli a2, a2, 9
+ bnone a3, a6, .Lsub_xexpdiff
+ addi a7, a7, 1
+ j .Lsub_xexpdiff
+
+.Lsub_return_zero:
+ movi a2, 0
+ leaf_return
+
+.Lsub_borrow:
+ /* The subtraction has underflowed into the exponent field, so the
+ value needs to be renormalized. Shift the mantissa left as
+ needed to remove any leading zeros and adjust the exponent
+ accordingly. If the exponent is not large enough to remove
+ all the leading zeros, the result will be a subnormal value. */
+
+ slli a8, a2, 9
+ beqz a8, .Lsub_xzero
+ do_nsau a6, a8, a7, a11
+ srli a8, a8, 9
+ bge a6, a10, .Lsub_subnormal
+ addi a6, a6, 1
+
+.Lsub_normalize_shift:
+ /* Shift the mantissa (a8/a9) left by a6. */
+ ssl a6
+ src a8, a8, a9
+ sll a9, a9
+
+ /* Combine the shifted mantissa with the sign and exponent,
+ decrementing the exponent by a6. (The exponent has already
+ been decremented by one due to the borrow from the subtraction,
+ but adding the mantissa will increment the exponent by one.) */
+ srli a2, a2, 23
+ sub a2, a2, a6
+ slli a2, a2, 23
+ add a2, a2, a8
+ j .Lsub_round
+
+.Lsub_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+.Lsub_xzero:
+ /* If there was a borrow from the exponent, and the mantissa and
+ guard digits are all zero, then the inputs were equal and the
+ result should be zero. */
+ beqz a9, .Lsub_return_zero
+
+ /* Only the guard digit is nonzero. Shift by min(24, a10). */
+ addi a11, a10, -24
+ movi a6, 24
+ movltz a6, a10, a11
+ j .Lsub_normalize_shift
+
+.Lsub_subnormal:
+ /* The exponent is too small to shift away all the leading zeros.
+ Set a6 to the current exponent (which has already been
+ decremented by the borrow) so that the exponent of the result
+ will be zero. Do not add 1 to a6 in this case, because: (1)
+ adding the mantissa will not increment the exponent, so there is
+ no need to subtract anything extra from the exponent to
+ compensate, and (2) the effective exponent of a subnormal is 1
+ not 0 so the shift amount must be 1 smaller than normal. */
+ mov a6, a10
+ j .Lsub_normalize_shift
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_mulsf3
+
+ /* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__mulsf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Lmul_xexpzero:
+ /* Clear the sign bit of x. */
+ slli a2, a2, 1
+ srli a2, a2, 1
+
+ /* If x is zero, return zero. */
+ beqz a2, .Lmul_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ do_nsau a10, a2, a11, a12
+ addi a10, a10, -8
+ ssl a10
+ sll a2, a2
+ movi a8, 1
+ sub a8, a8, a10
+ j .Lmul_xnormalized
+
+.Lmul_yexpzero:
+ /* Clear the sign bit of y. */
+ slli a3, a3, 1
+ srli a3, a3, 1
+
+ /* If y is zero, return zero. */
+ beqz a3, .Lmul_return_zero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ do_nsau a10, a3, a11, a12
+ addi a10, a10, -8
+ ssl a10
+ sll a3, a3
+ movi a9, 1
+ sub a9, a9, a10
+ j .Lmul_ynormalized
+
+.Lmul_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ j .Lmul_done
+
+.Lmul_xnan_or_inf:
+ /* If y is zero, return NaN. */
+ slli a8, a3, 1
+ bnez a8, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+ j .Lmul_done
+1:
+ /* If y is NaN, return y. */
+ bnall a3, a6, .Lmul_returnx
+ slli a8, a3, 9
+ beqz a8, .Lmul_returnx
+
+.Lmul_returny:
+ mov a2, a3
+
+.Lmul_returnx:
+ /* Set the sign bit and return. */
+ extui a7, a7, 31, 1
+ slli a2, a2, 1
+ ssai 1
+ src a2, a7, a2
+ j .Lmul_done
+
+.Lmul_ynan_or_inf:
+ /* If x is zero, return NaN. */
+ slli a8, a2, 1
+ bnez a8, .Lmul_returny
+ movi a7, 0x400000 /* make it a quiet NaN */
+ or a2, a3, a7
+ j .Lmul_done
+
+ .align 4
+ .global __mulsf3
+ .type __mulsf3, @function
+__mulsf3:
+#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
+ addi sp, sp, -32
+ s32i a12, sp, 16
+ s32i a13, sp, 20
+ s32i a14, sp, 24
+ s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 64
+#else
+ leaf_entry sp, 32
+#endif
+ movi a6, 0x7f800000
+
+ /* Get the sign of the result. */
+ xor a7, a2, a3
+
+ /* Check for NaN and infinity. */
+ ball a2, a6, .Lmul_xnan_or_inf
+ ball a3, a6, .Lmul_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, a2, 23, 8
+ extui a9, a3, 23, 8
+
+ beqz a8, .Lmul_xexpzero
+.Lmul_xnormalized:
+ beqz a9, .Lmul_yexpzero
+.Lmul_ynormalized:
+
+ /* Add the exponents. */
+ add a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0xffffff
+ or a2, a2, a6
+ and a2, a2, a10
+ or a3, a3, a6
+ and a3, a3, a10
+
+ /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
+
+#if XCHAL_HAVE_MUL32_HIGH
+
+ mull a6, a2, a3
+ muluh a2, a2, a3
+
+#else
+
+ /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
+ products. These partial products are:
+
+ 0 xl * yl
+
+ 1 xl * yh
+ 2 xh * yl
+
+ 3 xh * yh
+
+ If using the Mul16 or Mul32 multiplier options, these input
+ chunks must be stored in separate registers. For Mac16, the
+ UMUL.AA.* opcodes can specify that the inputs come from either
+ half of the registers, so there is no need to shift them out
+ ahead of time. If there is no multiply hardware, the 16-bit
+ chunks can be extracted when setting up the arguments to the
+ separate multiply function. */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Calling a separate multiply function will clobber a0 and requires
+ use of a8 as a temporary, so save those values now. (The function
+ uses a custom ABI so nothing else needs to be saved.) */
+ s32i a0, sp, 0
+ s32i a8, sp, 4
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+ /* Get the high halves of the inputs into registers. */
+ srli a2h, a2, 16
+ srli a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+ /* Clear the high halves of the inputs. This does not matter
+ for MUL16 because the high bits are ignored. */
+ extui a2, a2, 0, 16
+ extui a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mul16u dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mull dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+ a period in the definition of do_mul below. These macros are a workaround
+ using underscores instead of periods when doing the concatenation. */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ umul_aa_ ## xhalf ## yhalf xreg, yreg; \
+ rsr dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+ extui dst, src, 0, 16
+#define set_arg_h(dst, src) \
+ srli dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a13, xreg); \
+ set_arg_ ## yhalf (a14, yreg); \
+ call0 .Lmul_mulsi3; \
+ mov dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+ /* Add pp1 and pp2 into a6 with carry-out in a9. */
+ do_mul(a6, a2, l, a3, h) /* pp 1 */
+ do_mul(a11, a2, h, a3, l) /* pp 2 */
+ movi a9, 0
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Shift the high half of a9/a6 into position in a9. Note that
+ this value can be safely incremented without any carry-outs. */
+ ssai 16
+ src a9, a9, a6
+
+ /* Compute the low word into a6. */
+ do_mul(a11, a2, l, a3, l) /* pp 0 */
+ sll a6, a6
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Compute the high word into a2. */
+ do_mul(a2, a2, h, a3, h) /* pp 3 */
+ add a2, a2, a9
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Restore values saved on the stack during the multiplication. */
+ l32i a0, sp, 0
+ l32i a8, sp, 4
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+ /* Shift left by 9 bits, unless there was a carry-out from the
+ multiply, in which case, shift by 8 bits and increment the
+ exponent. */
+ movi a4, 9
+ srli a5, a2, 24 - 9
+ beqz a5, 1f
+ addi a4, a4, -1
+ addi a8, a8, 1
+1: ssl a4
+ src a2, a2, a6
+ sll a6, a6
+
+ /* Subtract the extra bias from the exponent sum (plus one to account
+ for the explicit "1.0" of the mantissa that will be added to the
+ exponent in the final result). */
+ movi a4, 0x80
+ sub a8, a8, a4
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..fd are OK here. */
+ movi a4, 0xfe
+ bgeu a8, a4, .Lmul_overflow
+
+.Lmul_round:
+ /* Round. */
+ bgez a6, .Lmul_rounded
+ addi a2, a2, 1
+ slli a6, a6, 1
+ beqz a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 23
+ add a2, a2, a8
+
+.Lmul_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or a2, a2, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+ l32i a12, sp, 16
+ l32i a13, sp, 20
+ l32i a14, sp, 24
+ l32i a15, sp, 28
+ addi sp, sp, 32
+#endif
+ leaf_return
+
+.Lmul_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ j .Lmul_rounded
+
+.Lmul_overflow:
+ bltz a8, .Lmul_underflow
+ /* Return +/- Infinity. */
+ movi a8, 0xff
+ slli a2, a8, 23
+ j .Lmul_addsign
+
+.Lmul_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ mov a9, a6
+ ssr a8
+ bgeui a8, 32, .Lmul_flush_to_zero
+
+ /* Shift a2 right. Any bits that are shifted out of a2 are saved
+ in a6 (combined with the shifted-out bits currently in a6) for
+ rounding the result. */
+ sll a6, a2
+ srl a2, a2
+
+ /* Set the exponent to zero. */
+ movi a8, 0
+
+ /* Pack any nonzero bits shifted out into a6. */
+ beqz a9, .Lmul_round
+ movi a9, 1
+ or a6, a6, a9
+ j .Lmul_round
+
+.Lmul_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ j .Lmul_done
+
+#if XCHAL_NO_MUL
+
+ /* For Xtensa processors with no multiply hardware, this simplified
+ version of _mulsi3 is used for multiplying 16-bit chunks of
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
+ .align 4
+.Lmul_mulsi3:
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_mulsf3 */
+
+#ifdef L_divsf3
+
+ /* Division */
+__divsf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Ldiv_yexpzero:
+ /* Clear the sign bit of y. */
+ slli a3, a3, 1
+ srli a3, a3, 1
+
+ /* Check for division by zero. */
+ beqz a3, .Ldiv_yzero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ do_nsau a10, a3, a4, a5
+ addi a10, a10, -8
+ ssl a10
+ sll a3, a3
+ movi a9, 1
+ sub a9, a9, a10
+ j .Ldiv_ynormalized
+
+.Ldiv_yzero:
+ /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
+ slli a4, a2, 1
+ srli a4, a4, 1
+ srli a2, a7, 31
+ slli a2, a2, 31
+ or a2, a2, a6
+ bnez a4, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Ldiv_xexpzero:
+ /* Clear the sign bit of x. */
+ slli a2, a2, 1
+ srli a2, a2, 1
+
+ /* If x is zero, return zero. */
+ beqz a2, .Ldiv_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ do_nsau a10, a2, a4, a5
+ addi a10, a10, -8
+ ssl a10
+ sll a2, a2
+ movi a8, 1
+ sub a8, a8, a10
+ j .Ldiv_xnormalized
+
+.Ldiv_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ leaf_return
+
+.Ldiv_xnan_or_inf:
+ /* Set the sign bit of the result. */
+ srli a7, a3, 31
+ slli a7, a7, 31
+ xor a2, a2, a7
+ /* If y is NaN or Inf, return NaN. */
+ bnall a3, a6, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Ldiv_ynan_or_inf:
+ /* If y is Infinity, return zero. */
+ slli a8, a3, 9
+ beqz a8, .Ldiv_return_zero
+ /* y is NaN; return it. */
+ mov a2, a3
+ leaf_return
+
+ .align 4
+ .global __divsf3
+ .type __divsf3, @function
+__divsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Get the sign of the result. */
+ xor a7, a2, a3
+
+ /* Check for NaN and infinity. */
+ ball a2, a6, .Ldiv_xnan_or_inf
+ ball a3, a6, .Ldiv_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, a2, 23, 8
+ extui a9, a3, 23, 8
+
+ beqz a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:
+ beqz a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:
+
+ /* Subtract the exponents. */
+ sub a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0xffffff
+ or a2, a2, a6
+ and a2, a2, a10
+ or a3, a3, a6
+ and a3, a3, a10
+
+ /* The first digit of the mantissa division must be a one.
+ Shift x (and adjust the exponent) as needed to make this true. */
+ bltu a3, a2, 1f
+ slli a2, a2, 1
+ addi a8, a8, -1
+1:
+ /* Do the first subtraction and shift. */
+ sub a2, a2, a3
+ slli a2, a2, 1
+
+ /* Put the quotient into a10. */
+ movi a10, 1
+
+ /* Divide one bit at a time for 23 bits. */
+ movi a9, 23
+#if XCHAL_HAVE_LOOPS
+ loop a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+ /* Shift the quotient << 1. */
+ slli a10, a10, 1
+
+ /* Is this digit a 0 or 1? */
+ bltu a2, a3, 1f
+
+ /* Output a 1 and subtract. */
+ addi a10, a10, 1
+ sub a2, a2, a3
+
+ /* Shift the dividend << 1. */
+1: slli a2, a2, 1
+
+#if !XCHAL_HAVE_LOOPS
+ addi a9, a9, -1
+ bnez a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+ /* Add the exponent bias (less one to account for the explicit "1.0"
+ of the mantissa that will be added to the exponent in the final
+ result). */
+ addi a8, a8, 0x7e
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..fd are OK here. */
+ movi a4, 0xfe
+ bgeu a8, a4, .Ldiv_overflow
+
+.Ldiv_round:
+ /* Round. The remainder (<< 1) is in a2. */
+ bltu a2, a3, .Ldiv_rounded
+ addi a10, a10, 1
+ beq a2, a3, .Ldiv_exactlyhalf
+
+.Ldiv_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 23
+ add a2, a10, a8
+
+.Ldiv_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or a2, a2, a7
+ leaf_return
+
+.Ldiv_overflow:
+ bltz a8, .Ldiv_underflow
+ /* Return +/- Infinity. */
+ addi a8, a4, 1 /* 0xff */
+ slli a2, a8, 23
+ j .Ldiv_addsign
+
+.Ldiv_exactlyhalf:
+ /* Remainder is exactly half the divisor. Round even. */
+ srli a10, a10, 1
+ slli a10, a10, 1
+ j .Ldiv_rounded
+
+.Ldiv_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ ssr a8
+ bgeui a8, 32, .Ldiv_flush_to_zero
+
+ /* Shift a10 right. Any bits that are shifted out of a10 are
+ saved in a6 for rounding the result. */
+ sll a6, a10
+ srl a10, a10
+
+ /* Set the exponent to zero. */
+ movi a8, 0
+
+ /* Pack any nonzero remainder (in a2) into a6. */
+ beqz a2, 1f
+ movi a9, 1
+ or a6, a6, a9
+
+ /* Round a10 based on the bits shifted out into a6. */
+1: bgez a6, .Ldiv_rounded
+ addi a10, a10, 1
+ slli a6, a6, 1
+ bnez a6, .Ldiv_rounded
+ srli a10, a10, 1
+ slli a10, a10, 1
+ j .Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ leaf_return
+
+#endif /* L_divsf3 */
+
+#ifdef L_cmpsf2
+
+ /* Equal and Not Equal */
+
+ .align 4
+ .global __eqsf2
+ .global __nesf2
+ .set __nesf2, __eqsf2
+ .type __eqsf2, @function
+__eqsf2:
+ leaf_entry sp, 16
+ bne a2, a3, 4f
+
+ /* The values are equal but NaN != NaN. Check the exponent. */
+ movi a6, 0x7f800000
+ ball a2, a6, 3f
+
+ /* Equal. */
+ movi a2, 0
+ leaf_return
+
+ /* Not equal. */
+2: movi a2, 1
+ leaf_return
+
+ /* Check if the mantissas are nonzero. */
+3: slli a7, a2, 9
+ j 5f
+
+ /* Check if x and y are zero with different signs. */
+4: or a7, a2, a3
+ slli a7, a7, 1
+
+ /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+ or x when exponent(x) = 0x7f8 and x == y. */
+5: movi a2, 0
+ movi a3, 1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than */
+
+ .align 4
+ .global __gtsf2
+ .type __gtsf2, @function
+__gtsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Lle_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+
+ /* Less Than or Equal */
+
+ .align 4
+ .global __lesf2
+ .type __lesf2, @function
+__lesf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Lle_cmp
+ movi a2, 1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+.Lle_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, a2, a3
+ bltz a7, .Lle_diff_signs
+
+ /* Check if x is negative. */
+ bltz a2, .Lle_xneg
+
+ /* Check if x <= y. */
+ bltu a3, a2, 5f
+4: movi a2, 0
+ leaf_return
+
+.Lle_xneg:
+ /* Check if y <= x. */
+ bgeu a2, a3, 4b
+5: movi a2, 1
+ leaf_return
+
+.Lle_diff_signs:
+ bltz a2, 4b
+
+ /* Check if both x and y are zero. */
+ or a7, a2, a3
+ slli a7, a7, 1
+ movi a2, 1
+ movi a3, 0
+ moveqz a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than or Equal */
+
+ .align 4
+ .global __gesf2
+ .type __gesf2, @function
+__gesf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Llt_cmp
+ movi a2, -1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, -1
+ leaf_return
+
+
+ /* Less Than */
+
+ .align 4
+ .global __ltsf2
+ .type __ltsf2, @function
+__ltsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Llt_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+.Llt_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, a2, a3
+ bltz a7, .Llt_diff_signs
+
+ /* Check if x is negative. */
+ bltz a2, .Llt_xneg
+
+ /* Check if x < y. */
+ bgeu a2, a3, 5f
+4: movi a2, -1
+ leaf_return
+
+.Llt_xneg:
+ /* Check if y < x. */
+ bltu a3, a2, 4b
+5: movi a2, 0
+ leaf_return
+
+.Llt_diff_signs:
+ bgez a2, 5b
+
+ /* Check if both x and y are nonzero. */
+ or a7, a2, a3
+ slli a7, a7, 1
+ movi a2, 0
+ movi a3, -1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Unordered */
+
+ .align 4
+ .global __unordsf2
+ .type __unordsf2, @function
+__unordsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 3f
+1: ball a3, a6, 4f
+2: movi a2, 0
+ leaf_return
+
+3: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+4: slli a7, a3, 9
+ beqz a7, 2b
+ movi a2, 1
+ leaf_return
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_fixsfsi
+
+ .align 4
+ .global __fixsfsi
+ .type __fixsfsi, @function
+__fixsfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixsfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7e
+ bgei a4, 32, .Lfixsfsi_maxint
+ blti a4, 1, .Lfixsfsi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli a5, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixsfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixsfsi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi a2, 0
+
+.Lfixsfsi_maxint:
+ slli a4, a6, 8 /* 0x80000000 */
+ addi a5, a4, -1 /* 0x7fffffff */
+ movgez a4, a5, a2
+ mov a2, a4
+ leaf_return
+
+.Lfixsfsi_zero:
+ movi a2, 0
+ leaf_return
+
+#endif /* L_fixsfsi */
+
+#ifdef L_fixsfdi
+
+ .align 4
+ .global __fixsfdi
+ .type __fixsfdi, @function
+__fixsfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixsfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7e
+ bgei a4, 64, .Lfixsfdi_maxint
+ blti a4, 1, .Lfixsfdi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli xh, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixsfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixsfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixsfdi_smallshift:
+ movi xl, 0
+ sll xl, xh
+ srl xh, xh
+ j .Lfixsfdi_shifted
+
+.Lfixsfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixsfdi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi a2, 0
+
+.Lfixsfdi_maxint:
+ slli a7, a6, 8 /* 0x80000000 */
+ bgez a2, 1f
+ mov xh, a7
+ movi xl, 0
+ leaf_return
+
+1: addi xh, a7, -1 /* 0x7fffffff */
+ movi xl, -1
+ leaf_return
+
+.Lfixsfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+#endif /* L_fixsfdi */
+
+#ifdef L_fixunssfsi
+
+ .align 4
+ .global __fixunssfsi
+ .type __fixunssfsi, @function
+__fixunssfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixunssfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7f
+ bgei a4, 32, .Lfixunssfsi_maxint
+ bltz a4, .Lfixunssfsi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli a5, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 32, .Lfixunssfsi_bigexp
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixunssfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixunssfsi_maxint
+
+ /* Translate NaN to 0xffffffff. */
+ movi a2, -1
+ leaf_return
+
+.Lfixunssfsi_maxint:
+ slli a4, a6, 8 /* 0x80000000 */
+ movi a5, -1 /* 0xffffffff */
+ movgez a4, a5, a2
+ mov a2, a4
+ leaf_return
+
+.Lfixunssfsi_zero:
+ movi a2, 0
+ leaf_return
+
+.Lfixunssfsi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a2, 1f
+ mov a2, a5 /* no shift needed */
+ leaf_return
+
+ /* Return 0x80000000 if negative. */
+1: slli a2, a6, 8
+ leaf_return
+
+#endif /* L_fixunssfsi */
+
+#ifdef L_fixunssfdi
+
+ .align 4
+ .global __fixunssfdi
+ .type __fixunssfdi, @function
+__fixunssfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixunssfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7f
+ bgei a4, 64, .Lfixunssfdi_maxint
+ bltz a4, .Lfixunssfdi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli xh, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 64, .Lfixunssfdi_bigexp
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixunssfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixunssfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixunssfdi_smallshift:
+ movi xl, 0
+ src xl, xh, xl
+ srl xh, xh
+ j .Lfixunssfdi_shifted
+
+.Lfixunssfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixunssfdi_maxint
+
+ /* Translate NaN to 0xffffffff.... */
+1: movi xh, -1
+ movi xl, -1
+ leaf_return
+
+.Lfixunssfdi_maxint:
+ bgez a2, 1b
+2: slli xh, a6, 8 /* 0x80000000 */
+ movi xl, 0
+ leaf_return
+
+.Lfixunssfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+.Lfixunssfdi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a7, 2b
+ movi xl, 0
+ leaf_return /* no shift needed */
+
+#endif /* L_fixunssfdi */
+
+#ifdef L_floatsisf
+
+ .align 4
+ .global __floatunsisf
+ .type __floatunsisf, @function
+__floatunsisf:
+ leaf_entry sp, 16
+ beqz a2, .Lfloatsisf_return
+
+ /* Set the sign to zero and jump to the floatsisf code. */
+ movi a7, 0
+ j .Lfloatsisf_normalize
+
+ .align 4
+ .global __floatsisf
+ .type __floatsisf, @function
+__floatsisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ beqz a2, .Lfloatsisf_return
+
+ /* Save the sign. */
+ extui a7, a2, 31, 1
+
+ /* Get the absolute value. */
+#if XCHAL_HAVE_ABS
+ abs a2, a2
+#else
+ neg a4, a2
+ movltz a2, a4, a2
+#endif
+
+.Lfloatsisf_normalize:
+ /* Normalize with the first 1 bit in the msb. */
+ do_nsau a4, a2, a5, a6
+ ssl a4
+ sll a5, a2
+
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ srli a2, a5, 8
+ slli a6, a5, (32 - 8)
+
+ /* Set the exponent. */
+ movi a5, 0x9d /* 0x7e + 31 */
+ sub a5, a5, a4
+ slli a5, a5, 23
+ add a2, a2, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or a2, a2, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, .Lfloatsisf_return
+ addi a2, a2, 1 /* Overflow to the exponent is OK. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatsisf_exactlyhalf
+
+.Lfloatsisf_return:
+ leaf_return
+
+.Lfloatsisf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+#endif /* L_floatsisf */
+
+#ifdef L_floatdisf
+
+ .align 4
+ .global __floatundisf
+ .type __floatundisf, @function
+__floatundisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Set the sign to zero and jump to the floatdisf code. */
+ movi a7, 0
+ j .Lfloatdisf_normalize
+
+ .align 4
+ .global __floatdisf
+ .type __floatdisf, @function
+__floatdisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Save the sign. */
+ extui a7, xh, 31, 1
+
+ /* Get the absolute value. */
+ bgez xh, .Lfloatdisf_normalize
+ neg xl, xl
+ neg xh, xh
+ beqz xl, .Lfloatdisf_normalize
+ addi xh, xh, -1
+
+.Lfloatdisf_normalize:
+ /* Normalize with the first 1 bit in the msb of xh. */
+ beqz xh, .Lfloatdisf_bigshift
+ do_nsau a4, xh, a5, a6
+ ssl a4
+ src xh, xh, xl
+ sll xl, xl
+
+.Lfloatdisf_shifted:
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ ssai 8
+ sll a5, xl
+ src a6, xh, xl
+ srl xh, xh
+ beqz a5, 1f
+ movi a5, 1
+ or a6, a6, a5
+1:
+ /* Set the exponent. */
+ movi a5, 0xbd /* 0x7e + 63 */
+ sub a5, a5, a4
+ slli a5, a5, 23
+ add a2, xh, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or a2, a2, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, 2f
+ addi a2, a2, 1 /* Overflow to the exponent is OK. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatdisf_exactlyhalf
+2: leaf_return
+
+.Lfloatdisf_bigshift:
+ /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
+ do_nsau a4, xl, a5, a6
+ ssl a4
+ sll xh, xl
+ movi xl, 0
+ addi a4, a4, 32
+ j .Lfloatdisf_shifted
+
+.Lfloatdisf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+#endif /* L_floatdisf */
diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm
new file mode 100644
index 000000000..071b91711
--- /dev/null
+++ b/gcc/config/xtensa/lib1funcs.asm
@@ -0,0 +1,845 @@
+/* Assembly functions for the Xtensa version of libgcc1.
+ Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
+ Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "xtensa-config.h"
+
+/* Define macros for the ABS and ADDX* instructions to handle cases
+ where they are not included in the Xtensa processor configuration. */
+
+ .macro do_abs dst, src, tmp
+#if XCHAL_HAVE_ABS
+ abs \dst, \src
+#else
+ neg \tmp, \src
+ movgez \tmp, \src, \src
+ mov \dst, \tmp
+#endif
+ .endm
+
+ .macro do_addx2 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+ addx2 \dst, \as, \at
+#else
+ slli \tmp, \as, 1
+ add \dst, \tmp, \at
+#endif
+ .endm
+
+ .macro do_addx4 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+ addx4 \dst, \as, \at
+#else
+ slli \tmp, \as, 2
+ add \dst, \tmp, \at
+#endif
+ .endm
+
+ .macro do_addx8 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+ addx8 \dst, \as, \at
+#else
+ slli \tmp, \as, 3
+ add \dst, \tmp, \at
+#endif
+ .endm
+
+/* Define macros for leaf function entry and return, supporting either the
+ standard register windowed ABI or the non-windowed call0 ABI. These
+ macros do not allocate any extra stack space, so they only work for
+ leaf functions that do not need to spill anything to the stack. */
+
+ .macro leaf_entry reg, size
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ entry \reg, \size
+#else
+ /* do nothing */
+#endif
+ .endm
+
+ .macro leaf_return
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ retw
+#else
+ ret
+#endif
+ .endm
+
+
+#ifdef L_mulsi3
+ .align 4
+ .global __mulsi3
+ .type __mulsi3, @function
+__mulsi3:
+ leaf_entry sp, 16
+
+#if XCHAL_HAVE_MUL32
+ mull a2, a2, a3
+
+#elif XCHAL_HAVE_MUL16
+ or a4, a2, a3
+ srai a4, a4, 16
+ bnez a4, .LMUL16
+ mul16u a2, a2, a3
+ leaf_return
+.LMUL16:
+ srai a4, a2, 16
+ srai a5, a3, 16
+ mul16u a7, a4, a3
+ mul16u a6, a5, a2
+ mul16u a4, a2, a3
+ add a7, a7, a6
+ slli a7, a7, 16
+ add a2, a7, a4
+
+#elif XCHAL_HAVE_MAC16
+ mul.aa.hl a2, a3
+ mula.aa.lh a2, a3
+ rsr a5, ACCLO
+ umul.aa.ll a2, a3
+ rsr a4, ACCLO
+ slli a5, a5, 16
+ add a2, a4, a5
+
+#else /* !MUL32 && !MUL16 && !MAC16 */
+
+ /* Multiply one bit at a time, but unroll the loop 4x to better
+ exploit the addx instructions and avoid overhead.
+ Peel the first iteration to save a cycle on init. */
+
+ /* Avoid negative numbers. */
+ xor a5, a2, a3 /* Top bit is 1 if one input is negative. */
+ do_abs a3, a3, a6
+ do_abs a2, a2, a6
+
+ /* Swap so the second argument is smaller. */
+ sub a7, a2, a3
+ mov a4, a3
+ movgez a4, a2, a7 /* a4 = max (a2, a3) */
+ movltz a3, a2, a7 /* a3 = min (a2, a3) */
+
+ movi a2, 0
+ extui a6, a3, 0, 1
+ movnez a2, a4, a6
+
+ do_addx2 a7, a4, a2, a7
+ extui a6, a3, 1, 1
+ movnez a2, a7, a6
+
+ do_addx4 a7, a4, a2, a7
+ extui a6, a3, 2, 1
+ movnez a2, a7, a6
+
+ do_addx8 a7, a4, a2, a7
+ extui a6, a3, 3, 1
+ movnez a2, a7, a6
+
+ bgeui a3, 16, .Lmult_main_loop
+ neg a3, a2
+ movltz a2, a3, a5
+ leaf_return
+
+ .align 4
+.Lmult_main_loop:
+ srli a3, a3, 4
+ slli a4, a4, 4
+
+ add a7, a4, a2
+ extui a6, a3, 0, 1
+ movnez a2, a7, a6
+
+ do_addx2 a7, a4, a2, a7
+ extui a6, a3, 1, 1
+ movnez a2, a7, a6
+
+ do_addx4 a7, a4, a2, a7
+ extui a6, a3, 2, 1
+ movnez a2, a7, a6
+
+ do_addx8 a7, a4, a2, a7
+ extui a6, a3, 3, 1
+ movnez a2, a7, a6
+
+ bgeui a3, 16, .Lmult_main_loop
+
+ neg a3, a2
+ movltz a2, a3, a5
+
+#endif /* !MUL32 && !MUL16 && !MAC16 */
+
+ leaf_return
+ .size __mulsi3, . - __mulsi3
+
+#endif /* L_mulsi3 */
+
+
+#ifdef L_umulsidi3
+
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+ .align 4
+ .global __umulsidi3
+ .type __umulsidi3, @function
+__umulsidi3:
+#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
+ addi sp, sp, -32
+ s32i a12, sp, 16
+ s32i a13, sp, 20
+ s32i a14, sp, 24
+ s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 48
+#else
+ leaf_entry sp, 16
+#endif
+
+#ifdef __XTENSA_EB__
+#define wh a2
+#define wl a3
+#else
+#define wh a3
+#define wl a2
+#endif /* __XTENSA_EB__ */
+
+ /* This code is taken from the mulsf3 routine in ieee754-sf.S.
+ See more comments there. */
+
+#if XCHAL_HAVE_MUL32_HIGH
+ mull a6, a2, a3
+ muluh wh, a2, a3
+ mov wl, a6
+
+#else /* ! MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* a0 and a8 will be clobbered by calling the multiply function
+ but a8 is not used here and need not be saved. */
+ s32i a0, sp, 0
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+ /* Get the high halves of the inputs into registers. */
+ srli a2h, a2, 16
+ srli a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+ /* Clear the high halves of the inputs. This does not matter
+ for MUL16 because the high bits are ignored. */
+ extui a2, a2, 0, 16
+ extui a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mul16u dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mull dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+ a period in the definition of do_mul below. These macros are a workaround
+ using underscores instead of periods when doing the concatenation. */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ umul_aa_ ## xhalf ## yhalf xreg, yreg; \
+ rsr dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+ extui dst, src, 0, 16
+#define set_arg_h(dst, src) \
+ srli dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a13, xreg); \
+ set_arg_ ## yhalf (a14, yreg); \
+ call0 .Lmul_mulsi3; \
+ mov dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+ /* Add pp1 and pp2 into a6 with carry-out in a9. */
+ do_mul(a6, a2, l, a3, h) /* pp 1 */
+ do_mul(a11, a2, h, a3, l) /* pp 2 */
+ movi a9, 0
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Shift the high half of a9/a6 into position in a9. Note that
+ this value can be safely incremented without any carry-outs. */
+ ssai 16
+ src a9, a9, a6
+
+ /* Compute the low word into a6. */
+ do_mul(a11, a2, l, a3, l) /* pp 0 */
+ sll a6, a6
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Compute the high word into wh. */
+ do_mul(wh, a2, h, a3, h) /* pp 3 */
+ add wh, wh, a9
+ mov wl, a6
+
+#endif /* !MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Restore the original return address. */
+ l32i a0, sp, 0
+#endif
+#if __XTENSA_CALL0_ABI__
+ l32i a12, sp, 16
+ l32i a13, sp, 20
+ l32i a14, sp, 24
+ l32i a15, sp, 28
+ addi sp, sp, 32
+#endif
+ leaf_return
+
+#if XCHAL_NO_MUL
+
+ /* For Xtensa processors with no multiply hardware, this simplified
+ version of _mulsi3 is used for multiplying 16-bit chunks of
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
+ .align 4
+.Lmul_mulsi3:
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
+
+ .size __umulsidi3, . - __umulsidi3
+
+#endif /* L_umulsidi3 */
+
+
+/* Define a macro for the NSAU (unsigned normalize shift amount)
+ instruction, which computes the number of leading zero bits,
+ to handle cases where it is not included in the Xtensa processor
+ configuration. */
+
+ .macro do_nsau cnt, val, tmp, a
+#if XCHAL_HAVE_NSA
+ nsau \cnt, \val
+#else
+ mov \a, \val
+ movi \cnt, 0
+ extui \tmp, \a, 16, 16
+ bnez \tmp, 0f
+ movi \cnt, 16
+ slli \a, \a, 16
+0:
+ extui \tmp, \a, 24, 8
+ bnez \tmp, 1f
+ addi \cnt, \cnt, 8
+ slli \a, \a, 8
+1:
+ movi \tmp, __nsau_data
+ extui \a, \a, 24, 8
+ add \tmp, \tmp, \a
+ l8ui \tmp, \tmp, 0
+ add \cnt, \cnt, \tmp
+#endif /* !XCHAL_HAVE_NSA */
+ .endm
+
+#ifdef L_clz
+ .section .rodata
+ .align 4
+ .global __nsau_data
+ .type __nsau_data, @object
+__nsau_data:
+#if !XCHAL_HAVE_NSA
+ .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
+ .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
+ .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+ .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif /* !XCHAL_HAVE_NSA */
+ .size __nsau_data, . - __nsau_data
+ .hidden __nsau_data
+#endif /* L_clz */
+
+
+#ifdef L_clzsi2
+ .align 4
+ .global __clzsi2
+ .type __clzsi2, @function
+__clzsi2:
+ leaf_entry sp, 16
+ do_nsau a2, a2, a3, a4
+ leaf_return
+ .size __clzsi2, . - __clzsi2
+
+#endif /* L_clzsi2 */
+
+
+#ifdef L_ctzsi2
+ .align 4
+ .global __ctzsi2
+ .type __ctzsi2, @function
+__ctzsi2:
+ leaf_entry sp, 16
+ neg a3, a2
+ and a3, a3, a2
+ do_nsau a2, a3, a4, a5
+ neg a2, a2
+ addi a2, a2, 31
+ leaf_return
+ .size __ctzsi2, . - __ctzsi2
+
+#endif /* L_ctzsi2 */
+
+
+#ifdef L_ffssi2
+ .align 4
+ .global __ffssi2
+ .type __ffssi2, @function
+__ffssi2:
+ leaf_entry sp, 16
+ neg a3, a2
+ and a3, a3, a2
+ do_nsau a2, a3, a4, a5
+ neg a2, a2
+ addi a2, a2, 32
+ leaf_return
+ .size __ffssi2, . - __ffssi2
+
+#endif /* L_ffssi2 */
+
+
+#ifdef L_udivsi3
+ .align 4
+ .global __udivsi3
+ .type __udivsi3, @function
+__udivsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ quou a2, a2, a3
+#else
+ bltui a3, 2, .Lle_one /* check if the divisor <= 1 */
+
+ mov a6, a2 /* keep dividend in a6 */
+ do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */
+ do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
+ ssl a4
+ sll a3, a3 /* divisor <<= count */
+ movi a2, 0 /* quotient = 0 */
+
+ /* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a6, a3, .Lzerobit
+ sub a6, a6, a3
+ addi a2, a2, 1
+.Lzerobit:
+ slli a2, a2, 1
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+ bltu a6, a3, .Lreturn
+ addi a2, a2, 1 /* increment quotient if dividend >= divisor */
+.Lreturn:
+ leaf_return
+
+.Lle_one:
+ beqz a3, .Lerror /* if divisor == 1, return the dividend */
+ leaf_return
+
+.Lspecial:
+ /* return dividend >= divisor */
+ bltu a6, a3, .Lreturn0
+ movi a2, 1
+ leaf_return
+
+.Lerror:
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __udivsi3, . - __udivsi3
+
+#endif /* L_udivsi3 */
+
+
+#ifdef L_divsi3
+ .align 4
+ .global __divsi3
+ .type __divsi3, @function
+__divsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ quos a2, a2, a3
+#else
+ xor a7, a2, a3 /* sign = dividend ^ divisor */
+ do_abs a6, a2, a4 /* udividend = abs (dividend) */
+ do_abs a3, a3, a4 /* udivisor = abs (divisor) */
+ bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
+ do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
+ do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
+ ssl a4
+ sll a3, a3 /* udivisor <<= count */
+ movi a2, 0 /* quotient = 0 */
+
+ /* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a6, a3, .Lzerobit
+ sub a6, a6, a3
+ addi a2, a2, 1
+.Lzerobit:
+ slli a2, a2, 1
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+ bltu a6, a3, .Lreturn
+ addi a2, a2, 1 /* increment if udividend >= udivisor */
+.Lreturn:
+ neg a5, a2
+ movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
+ leaf_return
+
+.Lle_one:
+ beqz a3, .Lerror
+ neg a2, a6 /* if udivisor == 1, then return... */
+ movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
+ leaf_return
+
+.Lspecial:
+ bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
+ movi a2, 1
+ movi a4, -1
+ movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
+ leaf_return
+
+.Lerror:
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __divsi3, . - __divsi3
+
+#endif /* L_divsi3 */
+
+
+#ifdef L_umodsi3
+ .align 4
+ .global __umodsi3
+ .type __umodsi3, @function
+__umodsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ remu a2, a2, a3
+#else
+ bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */
+
+ do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */
+ do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
+ ssl a4
+ sll a3, a3 /* divisor <<= count */
+
+ /* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a2, a3, .Lzerobit
+ sub a2, a2, a3
+.Lzerobit:
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+ bltu a2, a3, .Lreturn
+ sub a2, a2, a3 /* subtract once more if dividend >= divisor */
+.Lreturn:
+ leaf_return
+
+.Lle_one:
+ bnez a3, .Lreturn0
+
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __umodsi3, . - __umodsi3
+
+#endif /* L_umodsi3 */
+
+
+#ifdef L_modsi3
+ .align 4
+ .global __modsi3
+ .type __modsi3, @function
+__modsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ rems a2, a2, a3
+#else
+ mov a7, a2 /* save original (signed) dividend */
+ do_abs a2, a2, a4 /* udividend = abs (dividend) */
+ do_abs a3, a3, a4 /* udivisor = abs (divisor) */
+ bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
+ do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
+ do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
+ ssl a4
+ sll a3, a3 /* udivisor <<= count */
+
+ /* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a2, a3, .Lzerobit
+ sub a2, a2, a3
+.Lzerobit:
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+ bltu a2, a3, .Lreturn
+ sub a2, a2, a3 /* subtract again if udividend >= udivisor */
+.Lreturn:
+ bgez a7, .Lpositive
+ neg a2, a2 /* if (dividend < 0), return -udividend */
+.Lpositive:
+ leaf_return
+
+.Lle_one:
+ bnez a3, .Lreturn0
+
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __modsi3, . - __modsi3
+
+#endif /* L_modsi3 */
+
+
+#ifdef __XTENSA_EB__
+#define uh a2
+#define ul a3
+#else
+#define uh a3
+#define ul a2
+#endif /* __XTENSA_EB__ */
+
+
+#ifdef L_ashldi3
+ .align 4
+ .global __ashldi3
+ .type __ashldi3, @function
+__ashldi3:
+ leaf_entry sp, 16
+ ssl a4
+ bgei a4, 32, .Llow_only
+ src uh, uh, ul
+ sll ul, ul
+ leaf_return
+
+.Llow_only:
+ sll uh, ul
+ movi ul, 0
+ leaf_return
+ .size __ashldi3, . - __ashldi3
+
+#endif /* L_ashldi3 */
+
+
+#ifdef L_ashrdi3
+ .align 4
+ .global __ashrdi3
+ .type __ashrdi3, @function
+__ashrdi3:
+ leaf_entry sp, 16
+ ssr a4
+ bgei a4, 32, .Lhigh_only
+ src ul, uh, ul
+ sra uh, uh
+ leaf_return
+
+.Lhigh_only:
+ sra ul, uh
+ srai uh, uh, 31
+ leaf_return
+ .size __ashrdi3, . - __ashrdi3
+
+#endif /* L_ashrdi3 */
+
+
+#ifdef L_lshrdi3
+ .align 4
+ .global __lshrdi3
+ .type __lshrdi3, @function
+__lshrdi3:
+ leaf_entry sp, 16
+ ssr a4
+ bgei a4, 32, .Lhigh_only1
+ src ul, uh, ul
+ srl uh, uh
+ leaf_return
+
+.Lhigh_only1:
+ srl ul, uh
+ movi uh, 0
+ leaf_return
+ .size __lshrdi3, . - __lshrdi3
+
+#endif /* L_lshrdi3 */
+
+
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
diff --git a/gcc/config/xtensa/lib2funcs.S b/gcc/config/xtensa/lib2funcs.S
new file mode 100644
index 000000000..65134e24c
--- /dev/null
+++ b/gcc/config/xtensa/lib2funcs.S
@@ -0,0 +1,186 @@
+/* Assembly functions for libgcc2.
+ Copyright (C) 2001, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "xtensa-config.h"
+
+/* __xtensa_libgcc_window_spill: This function flushes out all but the
+ current register window. This is used to set up the stack so that
+ arbitrary frames can be accessed. */
+
+ .align 4
+ .global __xtensa_libgcc_window_spill
+ .type __xtensa_libgcc_window_spill,@function
+__xtensa_libgcc_window_spill:
+ entry sp, 32
+ movi a2, 0
+ syscall
+ retw
+ .size __xtensa_libgcc_window_spill, .-__xtensa_libgcc_window_spill
+
+
+/* __xtensa_nonlocal_goto: This code does all the hard work of a
+ nonlocal goto on Xtensa. It is here in the library to avoid the
+ code size bloat of generating it in-line. There are two
+ arguments:
+
+ a2 = frame pointer for the procedure containing the label
+ a3 = goto handler address
+
+ This function never returns to its caller but instead goes directly
+ to the address of the specified goto handler. */
+
+ .align 4
+ .global __xtensa_nonlocal_goto
+ .type __xtensa_nonlocal_goto,@function
+__xtensa_nonlocal_goto:
+ entry sp, 32
+
+ /* Flush registers. */
+ mov a5, a2
+ movi a2, 0
+ syscall
+ mov a2, a5
+
+ /* Because the save area for a0-a3 is stored one frame below
+ the one identified by a2, the only way to restore those
+ registers is to unwind the stack. If alloca() were never
+ called, we could just unwind until finding the sp value
+ matching a2. However, a2 is a frame pointer, not a stack
+ pointer, and may not be encountered during the unwinding.
+ The solution is to unwind until going _past_ the value
+ given by a2. This involves keeping three stack pointer
+ values during the unwinding:
+
+ next = sp of frame N-1
+ cur = sp of frame N
+ prev = sp of frame N+1
+
+ When next > a2, the desired save area is stored relative
+ to prev. At this point, cur will be the same as a2
+ except in the alloca() case.
+
+ Besides finding the values to be restored to a0-a3, we also
+ need to find the current window size for the target
+ function. This can be extracted from the high bits of the
+ return address, initially in a0. As the unwinding
+ proceeds, the window size is taken from the value of a0
+ saved _two_ frames below the current frame. */
+
+ addi a5, sp, -16 /* a5 = prev - save area */
+ l32i a6, a5, 4
+ addi a6, a6, -16 /* a6 = cur - save area */
+ mov a8, a0 /* a8 = return address (for window size) */
+ j .Lfirstframe
+
+.Lnextframe:
+ l32i a8, a5, 0 /* next return address (for window size) */
+ mov a5, a6 /* advance prev */
+ addi a6, a7, -16 /* advance cur */
+.Lfirstframe:
+ l32i a7, a6, 4 /* a7 = next */
+ bgeu a2, a7, .Lnextframe
+
+ /* At this point, prev (a5) points to the save area with the saved
+ values of a0-a3. Copy those values into the save area at the
+ current sp so they will be reloaded when the return from this
+ function underflows. We don't have to worry about exceptions
+ while updating the current save area, because the windows have
+ already been flushed. */
+
+ addi a4, sp, -16 /* a4 = save area of this function */
+ l32i a6, a5, 0
+ l32i a7, a5, 4
+ s32i a6, a4, 0
+ s32i a7, a4, 4
+ l32i a6, a5, 8
+ l32i a7, a5, 12
+ s32i a6, a4, 8
+ s32i a7, a4, 12
+
+ /* Set return address to goto handler. Use the window size bits
+ from the return address two frames below the target. */
+ extui a8, a8, 30, 2 /* get window size from return addr. */
+ slli a3, a3, 2 /* get goto handler addr. << 2 */
+ ssai 2
+ src a0, a8, a3 /* combine them with a funnel shift */
+
+ retw
+ .size __xtensa_nonlocal_goto, .-__xtensa_nonlocal_goto
+
+
+/* __xtensa_sync_caches: This function is called after writing a trampoline
+ on the stack to force all the data writes to memory and invalidate the
+ instruction cache. a2 is the address of the new trampoline.
+
+ After the trampoline data is written out, it must be flushed out of
+ the data cache into memory. We use DHWB in case we have a writeback
+ cache. At least one DHWB instruction is needed for each data cache
+ line which may be touched by the trampoline. An ISYNC instruction
+ must follow the DHWBs.
+
+ We have to flush the i-cache to make sure that the new values get used.
+ At least one IHI instruction is needed for each i-cache line which may
+ be touched by the trampoline. An ISYNC instruction is also needed to
+ make sure that the modified instructions are loaded into the instruction
+ fetch buffer. */
+
+/* Use the maximum trampoline size. Flushing a bit extra is OK. */
+#define TRAMPOLINE_SIZE 60
+
+ .text
+ .align 4
+ .global __xtensa_sync_caches
+ .type __xtensa_sync_caches,@function
+__xtensa_sync_caches:
+ entry sp, 32
+#if XCHAL_DCACHE_SIZE > 0
+ /* Flush the trampoline from the data cache. */
+ extui a4, a2, 0, XCHAL_DCACHE_LINEWIDTH
+ addi a4, a4, TRAMPOLINE_SIZE
+ addi a4, a4, (1 << XCHAL_DCACHE_LINEWIDTH) - 1
+ srli a4, a4, XCHAL_DCACHE_LINEWIDTH
+ mov a3, a2
+.Ldcache_loop:
+ dhwb a3, 0
+ addi a3, a3, (1 << XCHAL_DCACHE_LINEWIDTH)
+ addi a4, a4, -1
+ bnez a4, .Ldcache_loop
+ isync
+#endif
+#if XCHAL_ICACHE_SIZE > 0
+ /* Invalidate the corresponding lines in the instruction cache. */
+ extui a4, a2, 0, XCHAL_ICACHE_LINEWIDTH
+ addi a4, a4, TRAMPOLINE_SIZE
+ addi a4, a4, (1 << XCHAL_ICACHE_LINEWIDTH) - 1
+ srli a4, a4, XCHAL_ICACHE_LINEWIDTH
+.Licache_loop:
+ ihi a2, 0
+ addi a2, a2, (1 << XCHAL_ICACHE_LINEWIDTH)
+ addi a4, a4, -1
+ bnez a4, .Licache_loop
+#endif
+ isync
+ retw
+ .size __xtensa_sync_caches, .-__xtensa_sync_caches
diff --git a/gcc/config/xtensa/libgcc-xtensa.ver b/gcc/config/xtensa/libgcc-xtensa.ver
new file mode 100644
index 000000000..43e7d4fc7
--- /dev/null
+++ b/gcc/config/xtensa/libgcc-xtensa.ver
@@ -0,0 +1,3 @@
+GCC_4.3.0 {
+ __umulsidi3
+}
diff --git a/gcc/config/xtensa/linux-unwind.h b/gcc/config/xtensa/linux-unwind.h
new file mode 100644
index 000000000..245649728
--- /dev/null
+++ b/gcc/config/xtensa/linux-unwind.h
@@ -0,0 +1,97 @@
+/* DWARF2 EH unwinding support for Xtensa.
+ Copyright (C) 2008, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Do code reading to identify a signal frame, and set the frame
+ state data appropriately. See unwind-dw2-xtensa.c for the structs.
+ Don't use this at all if inhibit_libc is used. */
+
+#ifndef inhibit_libc
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+/* Encoded bytes for Xtensa instructions:
+ movi a2, __NR_rt_sigreturn
+ syscall
+ entry (first byte only)
+ Some of the bytes are endian-dependent. */
+
+#define MOVI_BYTE0 0x22
+#define MOVI_BYTE2 225 /* __NR_rt_sigreturn */
+#define SYSC_BYTE0 0
+#define SYSC_BYTE2 0
+
+#ifdef __XTENSA_EB__
+#define MOVI_BYTE1 0x0a
+#define SYSC_BYTE1 0x05
+#define ENTRY_BYTE 0x6c
+#else
+#define MOVI_BYTE1 0xa0
+#define SYSC_BYTE1 0x50
+#define ENTRY_BYTE 0x36
+#endif
+
+#define MD_FALLBACK_FRAME_STATE_FOR xtensa_fallback_frame_state
+
+static _Unwind_Reason_Code
+xtensa_fallback_frame_state (struct _Unwind_Context *context,
+ _Unwind_FrameState *fs)
+{
+ unsigned char *pc = context->ra;
+ struct sigcontext *sc;
+
+ struct rt_sigframe {
+ siginfo_t info;
+ struct ucontext uc;
+ } *rt_;
+
+ /* movi a2, __NR_rt_sigreturn; syscall */
+ if (pc[0] != MOVI_BYTE0
+ || pc[1] != MOVI_BYTE1
+ || pc[2] != MOVI_BYTE2
+ || pc[3] != SYSC_BYTE0
+ || pc[4] != SYSC_BYTE1
+ || pc[5] != SYSC_BYTE2)
+ return _URC_END_OF_STACK;
+
+ rt_ = context->sp;
+ sc = &rt_->uc.uc_mcontext;
+ fs->signal_regs = (_Unwind_Word *) sc->sc_a;
+
+ /* If the signal arrived just before an ENTRY instruction, find the return
+ address and pretend the signal arrived before executing the CALL. */
+ if (*(unsigned char *) sc->sc_pc == ENTRY_BYTE)
+ {
+ unsigned callinc = (sc->sc_ps >> 16) & 3;
+ fs->signal_ra = ((sc->sc_a[callinc << 2] & XTENSA_RA_FIELD_MASK)
+ | context->ra_high_bits) - 3;
+ }
+ else
+ fs->signal_ra = sc->sc_pc;
+
+ fs->signal_frame = 1;
+ return _URC_NO_REASON;
+}
+
+#endif /* ifdef inhibit_libc */
diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h
new file mode 100644
index 000000000..83d2a9767
--- /dev/null
+++ b/gcc/config/xtensa/linux.h
@@ -0,0 +1,71 @@
+/* Xtensa Linux configuration.
+ Derived from the configuration for GCC for Intel i386 running Linux.
+ Copyright (C) 2001, 2002, 2003, 2006, 2007, 2008, 2010, 2011
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fputs (" (Xtensa GNU/Linux with ELF)", stderr);
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+ "%{mtext-section-literals:--text-section-literals} \
+ %{mno-text-section-literals:--no-text-section-literals} \
+ %{mtarget-align:--target-align} \
+ %{mno-target-align:--no-target-align} \
+ %{mlongcalls:--longcalls} \
+ %{mno-longcalls:--no-longcalls}"
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{shared:-shared} \
+ %{!shared: \
+ %{!static: \
+ %{rdynamic:-export-dynamic} \
+ -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+ %{static:-static}}"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* Always enable "-fpic" for Xtensa Linux. */
+#define XTENSA_ALWAYS_PIC 1
+
+#undef DBX_REGISTER_NUMBER
+
+#define MD_UNWIND_SUPPORT "config/xtensa/linux-unwind.h"
+
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
new file mode 100644
index 000000000..27f058de7
--- /dev/null
+++ b/gcc/config/xtensa/predicates.md
@@ -0,0 +1,175 @@
+;; Predicate definitions for Xtensa.
+;; Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "add_operand"
+ (ior (and (match_code "const_int")
+ (match_test "xtensa_simm8 (INTVAL (op))
+ || xtensa_simm8x256 (INTVAL (op))"))
+ (match_operand 0 "register_operand")))
+
+(define_predicate "addsubx_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) == 2
+ || INTVAL (op) == 4
+ || INTVAL (op) == 8")))
+
+(define_predicate "arith_operand"
+ (ior (and (match_code "const_int")
+ (match_test "xtensa_simm8 (INTVAL (op))"))
+ (match_operand 0 "register_operand")))
+
+;; Non-immediate operand excluding the constant pool.
+(define_predicate "nonimmed_operand"
+ (ior (and (match_operand 0 "memory_operand")
+ (match_test "!constantpool_mem_p (op)"))
+ (match_operand 0 "register_operand")))
+
+;; Memory operand excluding the constant pool.
+(define_predicate "mem_operand"
+ (and (match_operand 0 "memory_operand")
+ (match_test "!constantpool_mem_p (op)")))
+
+;; Memory operand in the constant pool.
+(define_predicate "constantpool_operand"
+ (match_test "constantpool_mem_p (op)"))
+
+(define_predicate "mask_operand"
+ (ior (and (match_code "const_int")
+ (match_test "xtensa_mask_immediate (INTVAL (op))"))
+ (match_operand 0 "register_operand")))
+
+(define_predicate "extui_fldsz_operand"
+ (and (match_code "const_int")
+ (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)")))
+
+(define_predicate "sext_operand"
+ (if_then_else (match_test "TARGET_SEXT")
+ (match_operand 0 "nonimmed_operand")
+ (match_operand 0 "mem_operand")))
+
+(define_predicate "sext_fldsz_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23")))
+
+(define_predicate "lsbitnum_operand"
+ (and (match_code "const_int")
+ (match_test "BITS_BIG_ENDIAN
+ ? (INTVAL (op) == BITS_PER_WORD - 1)
+ : (INTVAL (op) == 0)")))
+
+(define_predicate "branch_operand"
+ (ior (and (match_code "const_int")
+ (match_test "xtensa_b4const_or_zero (INTVAL (op))"))
+ (match_operand 0 "register_operand")))
+
+(define_predicate "ubranch_operand"
+ (ior (and (match_code "const_int")
+ (match_test "xtensa_b4constu (INTVAL (op))"))
+ (match_operand 0 "register_operand")))
+
+(define_predicate "call_insn_operand"
+ (match_code "const_int,const,symbol_ref,reg")
+{
+ if ((GET_CODE (op) == REG)
+ && (op != arg_pointer_rtx)
+ && ((REGNO (op) < FRAME_POINTER_REGNUM)
+ || (REGNO (op) > LAST_VIRTUAL_REGISTER)))
+ return true;
+
+ if (CONSTANT_ADDRESS_P (op))
+ {
+ /* Direct calls only allowed to static functions with PIC. */
+ if (flag_pic)
+ {
+ tree callee, callee_sec, caller_sec;
+
+ if (GET_CODE (op) != SYMBOL_REF
+ || !SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_EXTERNAL_P (op))
+ return false;
+
+ /* Don't attempt a direct call if the callee is known to be in
+ a different section, since there's a good chance it will be
+ out of range. */
+
+ if (flag_function_sections
+ || DECL_ONE_ONLY (current_function_decl))
+ return false;
+ caller_sec = DECL_SECTION_NAME (current_function_decl);
+ callee = SYMBOL_REF_DECL (op);
+ if (callee)
+ {
+ if (DECL_ONE_ONLY (callee))
+ return false;
+ callee_sec = DECL_SECTION_NAME (callee);
+ if (((caller_sec == NULL_TREE) ^ (callee_sec == NULL_TREE))
+ || (caller_sec != NULL_TREE
+ && strcmp (TREE_STRING_POINTER (caller_sec),
+ TREE_STRING_POINTER (callee_sec)) != 0))
+ return false;
+ }
+ else if (caller_sec != NULL_TREE)
+ return false;
+ }
+ return true;
+ }
+
+ return false;
+})
+
+(define_predicate "move_operand"
+ (ior
+ (ior (match_operand 0 "register_operand")
+ (and (match_operand 0 "memory_operand")
+ (match_test "!constantpool_mem_p (op)
+ || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))
+ (ior (and (match_code "const_int")
+ (match_test "GET_MODE_CLASS (mode) == MODE_INT
+ && xtensa_simm12b (INTVAL (op))"))
+ (and (match_code "const_int,const_double,const,symbol_ref,label_ref")
+ (match_test "TARGET_CONST16 && CONSTANT_P (op)
+ && GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))))
+
+;; Accept the floating point constant 1 in the appropriate mode.
+(define_predicate "const_float_1_operand"
+ (match_code "const_double")
+{
+ REAL_VALUE_TYPE d;
+ REAL_VALUE_FROM_CONST_DOUBLE (d, op);
+ return REAL_VALUES_EQUAL (d, dconst1);
+})
+
+(define_predicate "fpmem_offset_operand"
+ (and (match_code "const_int")
+ (match_test "xtensa_mem_offset (INTVAL (op), SFmode)")))
+
+(define_predicate "branch_operator"
+ (match_code "eq,ne,lt,ge"))
+
+(define_predicate "ubranch_operator"
+ (match_code "ltu,geu"))
+
+(define_predicate "boolean_operator"
+ (match_code "eq,ne"))
+
+(define_predicate "xtensa_cstoresi_operator"
+ (match_code "eq,ne,gt,ge,lt,le"))
+
+(define_predicate "tls_symbol_operand"
+ (and (match_code "symbol_ref")
+ (match_test "SYMBOL_REF_TLS_MODEL (op) != 0")))
diff --git a/gcc/config/xtensa/t-elf b/gcc/config/xtensa/t-elf
new file mode 100644
index 000000000..7d6cd1a3a
--- /dev/null
+++ b/gcc/config/xtensa/t-elf
@@ -0,0 +1,6 @@
+# Build CRT files and libgcc with the "longcalls" option
+CRTSTUFF_T_CFLAGS += -mlongcalls
+CRTSTUFF_T_CFLAGS_S += -mlongcalls
+TARGET_LIBGCC2_CFLAGS += -mlongcalls
+
+EXTRA_MULTILIB_PARTS = crti.o crtn.o crtbegin.o crtend.o
diff --git a/gcc/config/xtensa/t-linux b/gcc/config/xtensa/t-linux
new file mode 100644
index 000000000..7d535e155
--- /dev/null
+++ b/gcc/config/xtensa/t-linux
@@ -0,0 +1,3 @@
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
+
+SHLIB_MAPFILES += $(srcdir)/config/xtensa/libgcc-xtensa.ver
diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa
new file mode 100644
index 000000000..c3d98ae30
--- /dev/null
+++ b/gcc/config/xtensa/t-xtensa
@@ -0,0 +1,42 @@
+# Copyright (C) 2002, 2003, 2006, 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = xtensa/lib1funcs.asm
+LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
+ _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
+ _ashldi3 _ashrdi3 _lshrdi3 \
+ _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
+ _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
+ _floatdisf _floatundisf \
+ _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
+ _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
+ _floatdidf _floatundidf \
+ _truncdfsf2 _extendsfdf2
+
+LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S
+LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \
+ $(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
+
+$(T)crti.o: $(srcdir)/config/xtensa/crti.asm $(GCC_PASSES)
+ $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+ -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/xtensa/crti.asm
+$(T)crtn.o: $(srcdir)/config/xtensa/crtn.asm $(GCC_PASSES)
+ $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+ -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/xtensa/crtn.asm
+
+$(out_object_file): gt-xtensa.h
diff --git a/gcc/config/xtensa/unwind-dw2-xtensa.c b/gcc/config/xtensa/unwind-dw2-xtensa.c
new file mode 100644
index 000000000..9544f65ab
--- /dev/null
+++ b/gcc/config/xtensa/unwind-dw2-xtensa.c
@@ -0,0 +1,546 @@
+/* DWARF2 exception handling and frame unwinding for Xtensa.
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008, 2009
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "dwarf2.h"
+#include "unwind.h"
+#ifdef __USING_SJLJ_EXCEPTIONS__
+# define NO_SIZE_OF_ENCODED_VALUE
+#endif
+#include "unwind-pe.h"
+#include "unwind-dw2-fde.h"
+#include "unwind-dw2-xtensa.h"
+
+#ifndef __USING_SJLJ_EXCEPTIONS__
+
+/* The standard CIE and FDE structures work fine for Xtensa but the
+ variable-size register window save areas are not a good fit for the rest
+ of the standard DWARF unwinding mechanism. Nor is that mechanism
+ necessary, since the register save areas are always in fixed locations
+ in each stack frame. This file is a stripped down and customized version
+ of the standard DWARF unwinding code. It needs to be customized to have
+ builtin logic for finding the save areas and also to track the stack
+ pointer value (besides the CFA) while unwinding since the primary save
+ area is located below the stack pointer. It is stripped down to reduce
+ code size and ease the maintenance burden of tracking changes in the
+ standard version of the code. */
+
+#ifndef DWARF_REG_TO_UNWIND_COLUMN
+#define DWARF_REG_TO_UNWIND_COLUMN(REGNO) (REGNO)
+#endif
+
+#define XTENSA_RA_FIELD_MASK 0x3FFFFFFF
+
+/* This is the register and unwind state for a particular frame. This
+ provides the information necessary to unwind up past a frame and return
+ to its caller. */
+struct _Unwind_Context
+{
+ /* Track register window save areas of 4 registers each, instead of
+ keeping separate addresses for the individual registers. */
+ _Unwind_Word *reg[4];
+
+ void *cfa;
+ void *sp;
+ void *ra;
+
+ /* Cache the 2 high bits to replace the window size in return addresses. */
+ _Unwind_Word ra_high_bits;
+
+ void *lsda;
+ struct dwarf_eh_bases bases;
+ /* Signal frame context. */
+#define SIGNAL_FRAME_BIT ((~(_Unwind_Word) 0 >> 1) + 1)
+ _Unwind_Word flags;
+ /* 0 for now, can be increased when further fields are added to
+ struct _Unwind_Context. */
+ _Unwind_Word version;
+};
+
+
+/* Read unaligned data from the instruction buffer. */
+
+union unaligned
+{
+ void *p;
+} __attribute__ ((packed));
+
+static void uw_update_context (struct _Unwind_Context *, _Unwind_FrameState *);
+static _Unwind_Reason_Code uw_frame_state_for (struct _Unwind_Context *,
+ _Unwind_FrameState *);
+
+static inline void *
+read_pointer (const void *p) { const union unaligned *up = p; return up->p; }
+
+static inline _Unwind_Word
+_Unwind_IsSignalFrame (struct _Unwind_Context *context)
+{
+ return (context->flags & SIGNAL_FRAME_BIT) ? 1 : 0;
+}
+
+static inline void
+_Unwind_SetSignalFrame (struct _Unwind_Context *context, int val)
+{
+ if (val)
+ context->flags |= SIGNAL_FRAME_BIT;
+ else
+ context->flags &= ~SIGNAL_FRAME_BIT;
+}
+
+/* Get the value of register INDEX as saved in CONTEXT. */
+
+inline _Unwind_Word
+_Unwind_GetGR (struct _Unwind_Context *context, int index)
+{
+ _Unwind_Word *ptr;
+
+ index = DWARF_REG_TO_UNWIND_COLUMN (index);
+ ptr = context->reg[index >> 2] + (index & 3);
+
+ return *ptr;
+}
+
+/* Get the value of the CFA as saved in CONTEXT. */
+
+_Unwind_Word
+_Unwind_GetCFA (struct _Unwind_Context *context)
+{
+ return (_Unwind_Ptr) context->cfa;
+}
+
+/* Overwrite the saved value for register INDEX in CONTEXT with VAL. */
+
+inline void
+_Unwind_SetGR (struct _Unwind_Context *context, int index, _Unwind_Word val)
+{
+ _Unwind_Word *ptr;
+
+ index = DWARF_REG_TO_UNWIND_COLUMN (index);
+ ptr = context->reg[index >> 2] + (index & 3);
+
+ *ptr = val;
+}
+
+/* Retrieve the return address for CONTEXT. */
+
+inline _Unwind_Ptr
+_Unwind_GetIP (struct _Unwind_Context *context)
+{
+ return (_Unwind_Ptr) context->ra;
+}
+
+/* Retrieve the return address and flag whether that IP is before
+ or after first not yet fully executed instruction. */
+
+inline _Unwind_Ptr
+_Unwind_GetIPInfo (struct _Unwind_Context *context, int *ip_before_insn)
+{
+ *ip_before_insn = _Unwind_IsSignalFrame (context);
+ return (_Unwind_Ptr) context->ra;
+}
+
+/* Overwrite the return address for CONTEXT with VAL. */
+
+inline void
+_Unwind_SetIP (struct _Unwind_Context *context, _Unwind_Ptr val)
+{
+ context->ra = (void *) val;
+}
+
+void *
+_Unwind_GetLanguageSpecificData (struct _Unwind_Context *context)
+{
+ return context->lsda;
+}
+
+_Unwind_Ptr
+_Unwind_GetRegionStart (struct _Unwind_Context *context)
+{
+ return (_Unwind_Ptr) context->bases.func;
+}
+
+void *
+_Unwind_FindEnclosingFunction (void *pc)
+{
+ struct dwarf_eh_bases bases;
+ const struct dwarf_fde *fde = _Unwind_Find_FDE (pc-1, &bases);
+ if (fde)
+ return bases.func;
+ else
+ return NULL;
+}
+
+_Unwind_Ptr
+_Unwind_GetDataRelBase (struct _Unwind_Context *context)
+{
+ return (_Unwind_Ptr) context->bases.dbase;
+}
+
+_Unwind_Ptr
+_Unwind_GetTextRelBase (struct _Unwind_Context *context)
+{
+ return (_Unwind_Ptr) context->bases.tbase;
+}
+
+#ifdef MD_UNWIND_SUPPORT
+#include MD_UNWIND_SUPPORT
+#endif
+
+/* Extract any interesting information from the CIE for the translation
+ unit F belongs to. Return a pointer to the byte after the augmentation,
+ or NULL if we encountered an undecipherable augmentation. */
+
+static const unsigned char *
+extract_cie_info (const struct dwarf_cie *cie, struct _Unwind_Context *context,
+ _Unwind_FrameState *fs)
+{
+ const unsigned char *aug = cie->augmentation;
+ const unsigned char *p = aug + strlen ((const char *)aug) + 1;
+ const unsigned char *ret = NULL;
+ _uleb128_t utmp;
+ _sleb128_t stmp;
+
+ /* g++ v2 "eh" has pointer immediately following augmentation string,
+ so it must be handled first. */
+ if (aug[0] == 'e' && aug[1] == 'h')
+ {
+ fs->eh_ptr = read_pointer (p);
+ p += sizeof (void *);
+ aug += 2;
+ }
+
+ /* Immediately following the augmentation are the code and
+ data alignment and return address column. */
+ p = read_uleb128 (p, &utmp);
+ p = read_sleb128 (p, &stmp);
+ if (cie->version == 1)
+ fs->retaddr_column = *p++;
+ else
+ {
+ p = read_uleb128 (p, &utmp);
+ fs->retaddr_column = (_Unwind_Word)utmp;
+ }
+ fs->lsda_encoding = DW_EH_PE_omit;
+
+ /* If the augmentation starts with 'z', then a uleb128 immediately
+ follows containing the length of the augmentation field following
+ the size. */
+ if (*aug == 'z')
+ {
+ p = read_uleb128 (p, &utmp);
+ ret = p + utmp;
+
+ fs->saw_z = 1;
+ ++aug;
+ }
+
+ /* Iterate over recognized augmentation subsequences. */
+ while (*aug != '\0')
+ {
+ /* "L" indicates a byte showing how the LSDA pointer is encoded. */
+ if (aug[0] == 'L')
+ {
+ fs->lsda_encoding = *p++;
+ aug += 1;
+ }
+
+ /* "R" indicates a byte indicating how FDE addresses are encoded. */
+ else if (aug[0] == 'R')
+ {
+ fs->fde_encoding = *p++;
+ aug += 1;
+ }
+
+ /* "P" indicates a personality routine in the CIE augmentation. */
+ else if (aug[0] == 'P')
+ {
+ _Unwind_Ptr personality;
+
+ p = read_encoded_value (context, *p, p + 1, &personality);
+ fs->personality = (_Unwind_Personality_Fn) personality;
+ aug += 1;
+ }
+
+ /* "S" indicates a signal frame. */
+ else if (aug[0] == 'S')
+ {
+ fs->signal_frame = 1;
+ aug += 1;
+ }
+
+ /* Otherwise we have an unknown augmentation string.
+ Bail unless we saw a 'z' prefix. */
+ else
+ return ret;
+ }
+
+ return ret ? ret : p;
+}
+
+/* Given the _Unwind_Context CONTEXT for a stack frame, look up the FDE for
+ its caller and decode it into FS. This function also sets the
+ lsda member of CONTEXT, as it is really information
+ about the caller's frame. */
+
+static _Unwind_Reason_Code
+uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+ const struct dwarf_fde *fde;
+ const struct dwarf_cie *cie;
+ const unsigned char *aug;
+ int window_size;
+ _Unwind_Word *ra_ptr;
+
+ memset (fs, 0, sizeof (*fs));
+ context->lsda = 0;
+
+ fde = _Unwind_Find_FDE (context->ra + _Unwind_IsSignalFrame (context) - 1,
+ &context->bases);
+ if (fde == NULL)
+ {
+#ifdef MD_FALLBACK_FRAME_STATE_FOR
+ _Unwind_Reason_Code reason;
+ /* Couldn't find frame unwind info for this function. Try a
+ target-specific fallback mechanism. This will necessarily
+ not provide a personality routine or LSDA. */
+ reason = MD_FALLBACK_FRAME_STATE_FOR (context, fs);
+ if (reason != _URC_END_OF_STACK)
+ return reason;
+#endif
+ /* The frame was not recognized and handled by the fallback function,
+ but it is not really the end of the stack. Fall through here and
+ unwind it anyway. */
+ }
+ else
+ {
+ cie = get_cie (fde);
+ if (extract_cie_info (cie, context, fs) == NULL)
+ /* CIE contained unknown augmentation. */
+ return _URC_FATAL_PHASE1_ERROR;
+
+ /* Locate augmentation for the fde. */
+ aug = (const unsigned char *) fde + sizeof (*fde);
+ aug += 2 * size_of_encoded_value (fs->fde_encoding);
+ if (fs->saw_z)
+ {
+ _uleb128_t i;
+ aug = read_uleb128 (aug, &i);
+ }
+ if (fs->lsda_encoding != DW_EH_PE_omit)
+ {
+ _Unwind_Ptr lsda;
+
+ aug = read_encoded_value (context, fs->lsda_encoding, aug, &lsda);
+ context->lsda = (void *) lsda;
+ }
+ }
+
+ /* Check for the end of the stack. This needs to be checked after
+ the MD_FALLBACK_FRAME_STATE_FOR check for signal frames because
+ the contents of context->reg[0] are undefined at a signal frame,
+ and register a0 may appear to be zero. (The return address in
+ context->ra comes from register a4 or a8). */
+ ra_ptr = context->reg[0];
+ if (ra_ptr && *ra_ptr == 0)
+ return _URC_END_OF_STACK;
+
+ /* Find the window size from the high bits of the return address. */
+ if (ra_ptr)
+ window_size = (*ra_ptr >> 30) * 4;
+ else
+ window_size = 8;
+
+ fs->retaddr_column = window_size;
+
+ return _URC_NO_REASON;
+}
+
+static void
+uw_update_context_1 (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+ struct _Unwind_Context orig_context = *context;
+ _Unwind_Word *sp, *cfa, *next_cfa;
+ int i;
+
+ if (fs->signal_regs)
+ {
+ cfa = (_Unwind_Word *) fs->signal_regs[1];
+ next_cfa = (_Unwind_Word *) cfa[-3];
+
+ for (i = 0; i < 4; i++)
+ context->reg[i] = fs->signal_regs + (i << 2);
+ }
+ else
+ {
+ int window_size = fs->retaddr_column >> 2;
+
+ sp = (_Unwind_Word *) orig_context.sp;
+ cfa = (_Unwind_Word *) orig_context.cfa;
+ next_cfa = (_Unwind_Word *) cfa[-3];
+
+ /* Registers a0-a3 are in the save area below sp. */
+ context->reg[0] = sp - 4;
+
+ /* Find the extra save area below next_cfa. */
+ for (i = 1; i < window_size; i++)
+ context->reg[i] = next_cfa - 4 * (1 + window_size - i);
+
+ /* Remaining registers rotate from previous save areas. */
+ for (i = window_size; i < 4; i++)
+ context->reg[i] = orig_context.reg[i - window_size];
+ }
+
+ context->sp = cfa;
+ context->cfa = next_cfa;
+
+ _Unwind_SetSignalFrame (context, fs->signal_frame);
+}
+
+/* CONTEXT describes the unwind state for a frame, and FS describes the FDE
+ of its caller. Update CONTEXT to refer to the caller as well. Note
+ that the lsda member is not updated here, but later in
+ uw_frame_state_for. */
+
+static void
+uw_update_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+ uw_update_context_1 (context, fs);
+
+ /* Compute the return address now, since the return address column
+ can change from frame to frame. */
+ if (fs->signal_ra != 0)
+ context->ra = (void *) fs->signal_ra;
+ else
+ context->ra = (void *) ((_Unwind_GetGR (context, fs->retaddr_column)
+ & XTENSA_RA_FIELD_MASK) | context->ra_high_bits);
+}
+
+static void
+uw_advance_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+ uw_update_context (context, fs);
+}
+
+/* Fill in CONTEXT for top-of-stack. The only valid registers at this
+ level will be the return address and the CFA. */
+
+#define uw_init_context(CONTEXT) \
+ do \
+ { \
+ __builtin_unwind_init (); \
+ uw_init_context_1 (CONTEXT, __builtin_dwarf_cfa (), \
+ __builtin_return_address (0)); \
+ } \
+ while (0)
+
+static void __attribute__((noinline))
+uw_init_context_1 (struct _Unwind_Context *context, void *outer_cfa,
+ void *outer_ra)
+{
+ void *ra = __builtin_return_address (0);
+ void *cfa = __builtin_dwarf_cfa ();
+ _Unwind_FrameState fs;
+
+ memset (context, 0, sizeof (struct _Unwind_Context));
+ context->ra = ra;
+
+ memset (&fs, 0, sizeof (fs));
+ fs.retaddr_column = 8;
+ context->sp = cfa;
+ context->cfa = outer_cfa;
+ context->ra_high_bits =
+ ((_Unwind_Word) uw_init_context_1) & ~XTENSA_RA_FIELD_MASK;
+ uw_update_context_1 (context, &fs);
+
+ context->ra = outer_ra;
+}
+
+
+/* Install TARGET into CURRENT so that we can return to it. This is a
+ macro because __builtin_eh_return must be invoked in the context of
+ our caller. */
+
+#define uw_install_context(CURRENT, TARGET) \
+ do \
+ { \
+ long offset = uw_install_context_1 ((CURRENT), (TARGET)); \
+ void *handler = __builtin_frob_return_addr ((TARGET)->ra); \
+ __builtin_eh_return (offset, handler); \
+ } \
+ while (0)
+
+static long
+uw_install_context_1 (struct _Unwind_Context *current,
+ struct _Unwind_Context *target)
+{
+ long i;
+
+ /* The eh_return insn assumes a window size of 8, so don't bother copying
+ the save areas for registers a8-a15 since they won't be reloaded. */
+ for (i = 0; i < 2; ++i)
+ {
+ void *c = current->reg[i];
+ void *t = target->reg[i];
+
+ if (t && c && t != c)
+ memcpy (c, t, 4 * sizeof (_Unwind_Word));
+ }
+
+ return 0;
+}
+
+static inline _Unwind_Ptr
+uw_identify_context (struct _Unwind_Context *context)
+{
+ return _Unwind_GetCFA (context);
+}
+
+
+#include "unwind.inc"
+
+#if defined (USE_GAS_SYMVER) && defined (SHARED) && defined (USE_LIBUNWIND_EXCEPTIONS)
+alias (_Unwind_Backtrace);
+alias (_Unwind_DeleteException);
+alias (_Unwind_FindEnclosingFunction);
+alias (_Unwind_ForcedUnwind);
+alias (_Unwind_GetDataRelBase);
+alias (_Unwind_GetTextRelBase);
+alias (_Unwind_GetCFA);
+alias (_Unwind_GetGR);
+alias (_Unwind_GetIP);
+alias (_Unwind_GetLanguageSpecificData);
+alias (_Unwind_GetRegionStart);
+alias (_Unwind_RaiseException);
+alias (_Unwind_Resume);
+alias (_Unwind_Resume_or_Rethrow);
+alias (_Unwind_SetGR);
+alias (_Unwind_SetIP);
+#endif
+
+#endif /* !USING_SJLJ_EXCEPTIONS */
diff --git a/gcc/config/xtensa/unwind-dw2-xtensa.h b/gcc/config/xtensa/unwind-dw2-xtensa.h
new file mode 100644
index 000000000..d13b3264c
--- /dev/null
+++ b/gcc/config/xtensa/unwind-dw2-xtensa.h
@@ -0,0 +1,50 @@
+/* DWARF2 frame unwind data structure for Xtensa.
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2007, 2008,
+ 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* A target can override (perhaps for backward compatibility) how
+ many dwarf2 columns are unwound. */
+#ifndef DWARF_FRAME_REGISTERS
+#define DWARF_FRAME_REGISTERS FIRST_PSEUDO_REGISTER
+#endif
+
+/* Xtensa's variable-size register window save areas can be unwound without
+ any unwind info. This is a stripped down version of the standard DWARF
+ _Unwind_FrameState. */
+typedef struct
+{
+ /* The information we care about from the CIE/FDE. */
+ _Unwind_Personality_Fn personality;
+ _Unwind_Word retaddr_column;
+ unsigned char fde_encoding;
+ unsigned char lsda_encoding;
+ unsigned char saw_z;
+ unsigned char signal_frame;
+ void *eh_ptr;
+
+ /* Saved registers for a signal frame. */
+ _Unwind_Word *signal_regs;
+ _Unwind_Word signal_ra;
+} _Unwind_FrameState;
+
diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
new file mode 100644
index 000000000..0d1738f4e
--- /dev/null
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -0,0 +1,74 @@
+/* Prototypes of target machine for GNU compiler for Xtensa.
+ Copyright 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
+ Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef __XTENSA_PROTOS_H__
+#define __XTENSA_PROTOS_H__
+
+/* Functions to test whether an immediate fits in a given field. */
+extern bool xtensa_simm8 (HOST_WIDE_INT);
+extern bool xtensa_simm8x256 (HOST_WIDE_INT);
+extern bool xtensa_simm12b (HOST_WIDE_INT);
+extern bool xtensa_b4const_or_zero (HOST_WIDE_INT);
+extern bool xtensa_b4constu (HOST_WIDE_INT);
+extern bool xtensa_mask_immediate (HOST_WIDE_INT);
+extern bool xtensa_mem_offset (unsigned, enum machine_mode);
+
+/* Functions within xtensa.c that we reference. */
+#ifdef RTX_CODE
+extern int xt_true_regnum (rtx);
+extern int xtensa_valid_move (enum machine_mode, rtx *);
+extern int smalloffset_mem_p (rtx);
+extern int constantpool_mem_p (rtx);
+extern void xtensa_extend_reg (rtx, rtx);
+extern void xtensa_expand_conditional_branch (rtx *, enum machine_mode);
+extern int xtensa_expand_conditional_move (rtx *, int);
+extern int xtensa_expand_scc (rtx *, enum machine_mode);
+extern int xtensa_expand_block_move (rtx *);
+extern void xtensa_split_operand_pair (rtx *, enum machine_mode);
+extern int xtensa_emit_move_sequence (rtx *, enum machine_mode);
+extern rtx xtensa_copy_incoming_a7 (rtx);
+extern void xtensa_expand_nonlocal_goto (rtx *);
+extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx);
+extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool);
+extern void xtensa_emit_loop_end (rtx, rtx *);
+extern char *xtensa_emit_branch (bool, bool, rtx *);
+extern char *xtensa_emit_bit_branch (bool, bool, rtx *);
+extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
+extern char *xtensa_emit_call (int, rtx *);
+extern bool xtensa_tls_referenced_p (rtx);
+
+#ifdef TREE_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
+#endif /* TREE_CODE */
+
+extern void print_operand (FILE *, rtx, int);
+extern void print_operand_address (FILE *, rtx);
+extern void xtensa_output_literal (FILE *, rtx, enum machine_mode, int);
+extern rtx xtensa_return_addr (int, rtx);
+#endif /* RTX_CODE */
+
+extern void xtensa_setup_frame_addresses (void);
+extern int xtensa_dbx_register_number (int);
+extern long compute_frame_size (int);
+extern void xtensa_expand_prologue (void);
+extern void order_regs_for_local_alloc (void);
+
+#endif /* !__XTENSA_PROTOS_H__ */
diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c
new file mode 100644
index 000000000..e7c395be5
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.c
@@ -0,0 +1,3715 @@
+/* Subroutines for insn-output.c for Tensilica's Xtensa architecture.
+ Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+ Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+#include "recog.h"
+#include "output.h"
+#include "tree.h"
+#include "expr.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "gimple.h"
+#include "df.h"
+
+
+/* Enumeration for all of the relational tests, so that we can build
+ arrays indexed by the test type, and not worry about the order
+ of EQ, NE, etc. */
+
+enum internal_test
+{
+ ITEST_EQ,
+ ITEST_NE,
+ ITEST_GT,
+ ITEST_GE,
+ ITEST_LT,
+ ITEST_LE,
+ ITEST_GTU,
+ ITEST_GEU,
+ ITEST_LTU,
+ ITEST_LEU,
+ ITEST_MAX
+};
+
+/* Array giving truth value on whether or not a given hard register
+ can support a given mode. */
+char xtensa_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER];
+
+/* Current frame size calculated by compute_frame_size. */
+unsigned xtensa_current_frame_size;
+
+/* Largest block move to handle in-line. */
+#define LARGEST_MOVE_RATIO 15
+
+/* Define the structure for the machine field in struct function. */
+struct GTY(()) machine_function
+{
+ int accesses_prev_frame;
+ bool need_a7_copy;
+ bool vararg_a7;
+ rtx vararg_a7_copy;
+ rtx set_frame_ptr_insn;
+};
+
+/* Vector, indexed by hard register number, which contains 1 for a
+ register that is allowable in a candidate for leaf function
+ treatment. */
+
+const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] =
+{
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1
+};
+
+/* Map hard register number to register class */
+const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER] =
+{
+ RL_REGS, SP_REG, RL_REGS, RL_REGS,
+ RL_REGS, RL_REGS, RL_REGS, GR_REGS,
+ RL_REGS, RL_REGS, RL_REGS, RL_REGS,
+ RL_REGS, RL_REGS, RL_REGS, RL_REGS,
+ AR_REGS, AR_REGS, BR_REGS,
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+ ACC_REG,
+};
+
+static void xtensa_option_override (void);
+static enum internal_test map_test_to_internal_test (enum rtx_code);
+static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *);
+static rtx gen_float_relational (enum rtx_code, rtx, rtx);
+static rtx gen_conditional_move (enum rtx_code, enum machine_mode, rtx, rtx);
+static rtx fixup_subreg_mem (rtx);
+static struct machine_function * xtensa_init_machine_status (void);
+static rtx xtensa_legitimize_tls_address (rtx);
+static rtx xtensa_legitimize_address (rtx, rtx, enum machine_mode);
+static bool xtensa_mode_dependent_address_p (const_rtx);
+static bool xtensa_return_in_msb (const_tree);
+static void printx (FILE *, signed int);
+static void xtensa_function_epilogue (FILE *, HOST_WIDE_INT);
+static rtx xtensa_builtin_saveregs (void);
+static bool xtensa_legitimate_address_p (enum machine_mode, rtx, bool);
+static unsigned int xtensa_multibss_section_type_flags (tree, const char *,
+ int) ATTRIBUTE_UNUSED;
+static section *xtensa_select_rtx_section (enum machine_mode, rtx,
+ unsigned HOST_WIDE_INT);
+static bool xtensa_rtx_costs (rtx, int, int, int *, bool);
+static int xtensa_register_move_cost (enum machine_mode, reg_class_t,
+ reg_class_t);
+static int xtensa_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static tree xtensa_build_builtin_va_list (void);
+static bool xtensa_return_in_memory (const_tree, const_tree);
+static tree xtensa_gimplify_va_arg_expr (tree, tree, gimple_seq *,
+ gimple_seq *);
+static void xtensa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+ const_tree, bool);
+static rtx xtensa_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+ const_tree, bool);
+static rtx xtensa_function_incoming_arg (CUMULATIVE_ARGS *,
+ enum machine_mode, const_tree, bool);
+static rtx xtensa_function_value (const_tree, const_tree, bool);
+static rtx xtensa_libcall_value (enum machine_mode, const_rtx);
+static bool xtensa_function_value_regno_p (const unsigned int);
+static unsigned int xtensa_function_arg_boundary (enum machine_mode,
+ const_tree);
+static void xtensa_init_builtins (void);
+static tree xtensa_fold_builtin (tree, int, tree *, bool);
+static rtx xtensa_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static void xtensa_va_start (tree, rtx);
+static bool xtensa_frame_pointer_required (void);
+static rtx xtensa_static_chain (const_tree, bool);
+static void xtensa_asm_trampoline_template (FILE *);
+static void xtensa_trampoline_init (rtx, tree, rtx);
+static bool xtensa_output_addr_const_extra (FILE *, rtx);
+
+static reg_class_t xtensa_preferred_reload_class (rtx, reg_class_t);
+static reg_class_t xtensa_preferred_output_reload_class (rtx, reg_class_t);
+static reg_class_t xtensa_secondary_reload (bool, rtx, reg_class_t,
+ enum machine_mode,
+ struct secondary_reload_info *);
+
+static bool constantpool_address_p (const_rtx addr);
+
+static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] =
+ REG_ALLOC_ORDER;
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */
+
+static const struct default_options xtensa_option_optimization_table[] =
+ {
+ { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+ /* Reordering blocks for Xtensa is not a good idea unless the
+ compiler understands the range of conditional branches.
+ Currently all branch relaxation for Xtensa is handled in the
+ assembler, so GCC cannot do a good job of reordering blocks.
+ Do not enable reordering unless it is explicitly requested. */
+ { OPT_LEVELS_ALL, OPT_freorder_blocks, NULL, 0 },
+ { OPT_LEVELS_NONE, 0, NULL, 0 }
+ };
+
+
+/* This macro generates the assembly code for function exit,
+ on machines that need it. If FUNCTION_EPILOGUE is not defined
+ then individual return instructions are generated for each
+ return statement. Args are same as for FUNCTION_PROLOGUE. */
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE xtensa_function_epilogue
+
+/* These hooks specify assembly directives for creating certain kinds
+ of integer object. */
+
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION xtensa_select_rtx_section
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS xtensa_legitimize_address
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P xtensa_mode_dependent_address_p
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST xtensa_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS xtensa_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST xtensa_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START xtensa_va_start
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY xtensa_return_in_memory
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE xtensa_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE xtensa_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P xtensa_function_value_regno_p
+
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE xtensa_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG xtensa_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG xtensa_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY xtensa_function_arg_boundary
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS xtensa_builtin_saveregs
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR xtensa_gimplify_va_arg_expr
+
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB xtensa_return_in_msb
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS xtensa_init_builtins
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN xtensa_fold_builtin
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN xtensa_expand_builtin
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS xtensa_preferred_reload_class
+#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS xtensa_preferred_output_reload_class
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD xtensa_secondary_reload
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS (TARGET_THREADPTR && HAVE_AS_TLS)
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM xtensa_tls_referenced_p
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P xtensa_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED xtensa_frame_pointer_required
+
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN xtensa_static_chain
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE xtensa_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT xtensa_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE xtensa_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE xtensa_option_optimization_table
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA xtensa_output_addr_const_extra
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Functions to test Xtensa immediate operand validity. */
+
+bool
+xtensa_simm8 (HOST_WIDE_INT v)
+{
+ return v >= -128 && v <= 127;
+}
+
+
+bool
+xtensa_simm8x256 (HOST_WIDE_INT v)
+{
+ return (v & 255) == 0 && (v >= -32768 && v <= 32512);
+}
+
+
+bool
+xtensa_simm12b (HOST_WIDE_INT v)
+{
+ return v >= -2048 && v <= 2047;
+}
+
+
+static bool
+xtensa_uimm8 (HOST_WIDE_INT v)
+{
+ return v >= 0 && v <= 255;
+}
+
+
+static bool
+xtensa_uimm8x2 (HOST_WIDE_INT v)
+{
+ return (v & 1) == 0 && (v >= 0 && v <= 510);
+}
+
+
+static bool
+xtensa_uimm8x4 (HOST_WIDE_INT v)
+{
+ return (v & 3) == 0 && (v >= 0 && v <= 1020);
+}
+
+
+static bool
+xtensa_b4const (HOST_WIDE_INT v)
+{
+ switch (v)
+ {
+ case -1:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ case 10:
+ case 12:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ case 256:
+ return true;
+ }
+ return false;
+}
+
+
+bool
+xtensa_b4const_or_zero (HOST_WIDE_INT v)
+{
+ if (v == 0)
+ return true;
+ return xtensa_b4const (v);
+}
+
+
+bool
+xtensa_b4constu (HOST_WIDE_INT v)
+{
+ switch (v)
+ {
+ case 32768:
+ case 65536:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ case 10:
+ case 12:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ case 256:
+ return true;
+ }
+ return false;
+}
+
+
+bool
+xtensa_mask_immediate (HOST_WIDE_INT v)
+{
+#define MAX_MASK_SIZE 16
+ int mask_size;
+
+ for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++)
+ {
+ if ((v & 1) == 0)
+ return false;
+ v = v >> 1;
+ if (v == 0)
+ return true;
+ }
+
+ return false;
+}
+
+
+/* This is just like the standard true_regnum() function except that it
+ works even when reg_renumber is not initialized. */
+
+int
+xt_true_regnum (rtx x)
+{
+ if (GET_CODE (x) == REG)
+ {
+ if (reg_renumber
+ && REGNO (x) >= FIRST_PSEUDO_REGISTER
+ && reg_renumber[REGNO (x)] >= 0)
+ return reg_renumber[REGNO (x)];
+ return REGNO (x);
+ }
+ if (GET_CODE (x) == SUBREG)
+ {
+ int base = xt_true_regnum (SUBREG_REG (x));
+ if (base >= 0 && base < FIRST_PSEUDO_REGISTER)
+ return base + subreg_regno_offset (REGNO (SUBREG_REG (x)),
+ GET_MODE (SUBREG_REG (x)),
+ SUBREG_BYTE (x), GET_MODE (x));
+ }
+ return -1;
+}
+
+
+int
+xtensa_valid_move (enum machine_mode mode, rtx *operands)
+{
+ /* Either the destination or source must be a register, and the
+ MAC16 accumulator doesn't count. */
+
+ if (register_operand (operands[0], mode))
+ {
+ int dst_regnum = xt_true_regnum (operands[0]);
+
+ /* The stack pointer can only be assigned with a MOVSP opcode. */
+ if (dst_regnum == STACK_POINTER_REGNUM)
+ return (mode == SImode
+ && register_operand (operands[1], mode)
+ && !ACC_REG_P (xt_true_regnum (operands[1])));
+
+ if (!ACC_REG_P (dst_regnum))
+ return true;
+ }
+ if (register_operand (operands[1], mode))
+ {
+ int src_regnum = xt_true_regnum (operands[1]);
+ if (!ACC_REG_P (src_regnum))
+ return true;
+ }
+ return FALSE;
+}
+
+
+int
+smalloffset_mem_p (rtx op)
+{
+ if (GET_CODE (op) == MEM)
+ {
+ rtx addr = XEXP (op, 0);
+ if (GET_CODE (addr) == REG)
+ return BASE_REG_P (addr, 0);
+ if (GET_CODE (addr) == PLUS)
+ {
+ rtx offset = XEXP (addr, 0);
+ HOST_WIDE_INT val;
+ if (GET_CODE (offset) != CONST_INT)
+ offset = XEXP (addr, 1);
+ if (GET_CODE (offset) != CONST_INT)
+ return FALSE;
+
+ val = INTVAL (offset);
+ return (val & 3) == 0 && (val >= 0 && val <= 60);
+ }
+ }
+ return FALSE;
+}
+
+
+static bool
+constantpool_address_p (const_rtx addr)
+{
+ const_rtx sym = addr;
+
+ if (GET_CODE (addr) == CONST)
+ {
+ rtx offset;
+
+ /* Only handle (PLUS (SYM, OFFSET)) form. */
+ addr = XEXP (addr, 0);
+ if (GET_CODE (addr) != PLUS)
+ return false;
+
+ /* Make sure the address is word aligned. */
+ offset = XEXP (addr, 1);
+ if ((!CONST_INT_P (offset))
+ || ((INTVAL (offset) & 3) != 0))
+ return false;
+
+ sym = XEXP (addr, 0);
+ }
+
+ if ((GET_CODE (sym) == SYMBOL_REF)
+ && CONSTANT_POOL_ADDRESS_P (sym))
+ return true;
+ return false;
+}
+
+
+int
+constantpool_mem_p (rtx op)
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ if (GET_CODE (op) == MEM)
+ return constantpool_address_p (XEXP (op, 0));
+ return FALSE;
+}
+
+
+/* Return TRUE if X is a thread-local symbol. */
+
+static bool
+xtensa_tls_symbol_p (rtx x)
+{
+ if (! TARGET_HAVE_TLS)
+ return false;
+
+ return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+
+void
+xtensa_extend_reg (rtx dst, rtx src)
+{
+ rtx temp = gen_reg_rtx (SImode);
+ rtx shift = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (GET_MODE (src)));
+
+ /* Generate paradoxical subregs as needed so that the modes match. */
+ src = simplify_gen_subreg (SImode, src, GET_MODE (src), 0);
+ dst = simplify_gen_subreg (SImode, dst, GET_MODE (dst), 0);
+
+ emit_insn (gen_ashlsi3 (temp, src, shift));
+ emit_insn (gen_ashrsi3 (dst, temp, shift));
+}
+
+
+bool
+xtensa_mem_offset (unsigned v, enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case BLKmode:
+ /* Handle the worst case for block moves. See xtensa_expand_block_move
+ where we emit an optimized block move operation if the block can be
+ moved in < "move_ratio" pieces. The worst case is when the block is
+ aligned but has a size of (3 mod 4) (does this happen?) so that the
+ last piece requires a byte load/store. */
+ return (xtensa_uimm8 (v)
+ && xtensa_uimm8 (v + MOVE_MAX * LARGEST_MOVE_RATIO));
+
+ case QImode:
+ return xtensa_uimm8 (v);
+
+ case HImode:
+ return xtensa_uimm8x2 (v);
+
+ case DFmode:
+ return (xtensa_uimm8x4 (v) && xtensa_uimm8x4 (v + 4));
+
+ default:
+ break;
+ }
+
+ return xtensa_uimm8x4 (v);
+}
+
+
+/* Make normal rtx_code into something we can index from an array. */
+
+static enum internal_test
+map_test_to_internal_test (enum rtx_code test_code)
+{
+ enum internal_test test = ITEST_MAX;
+
+ switch (test_code)
+ {
+ default: break;
+ case EQ: test = ITEST_EQ; break;
+ case NE: test = ITEST_NE; break;
+ case GT: test = ITEST_GT; break;
+ case GE: test = ITEST_GE; break;
+ case LT: test = ITEST_LT; break;
+ case LE: test = ITEST_LE; break;
+ case GTU: test = ITEST_GTU; break;
+ case GEU: test = ITEST_GEU; break;
+ case LTU: test = ITEST_LTU; break;
+ case LEU: test = ITEST_LEU; break;
+ }
+
+ return test;
+}
+
+
+/* Generate the code to compare two integer values. The return value is
+ the comparison expression. */
+
+static rtx
+gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */
+ rtx cmp0, /* first operand to compare */
+ rtx cmp1, /* second operand to compare */
+ int *p_invert /* whether branch needs to reverse test */)
+{
+ struct cmp_info
+ {
+ enum rtx_code test_code; /* test code to use in insn */
+ bool (*const_range_p) (HOST_WIDE_INT); /* range check function */
+ int const_add; /* constant to add (convert LE -> LT) */
+ int reverse_regs; /* reverse registers in test */
+ int invert_const; /* != 0 if invert value if cmp1 is constant */
+ int invert_reg; /* != 0 if invert value if cmp1 is register */
+ int unsignedp; /* != 0 for unsigned comparisons. */
+ };
+
+ static struct cmp_info info[ (int)ITEST_MAX ] = {
+
+ { EQ, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* EQ */
+ { NE, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* NE */
+
+ { LT, xtensa_b4const_or_zero, 1, 1, 1, 0, 0 }, /* GT */
+ { GE, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* GE */
+ { LT, xtensa_b4const_or_zero, 0, 0, 0, 0, 0 }, /* LT */
+ { GE, xtensa_b4const_or_zero, 1, 1, 1, 0, 0 }, /* LE */
+
+ { LTU, xtensa_b4constu, 1, 1, 1, 0, 1 }, /* GTU */
+ { GEU, xtensa_b4constu, 0, 0, 0, 0, 1 }, /* GEU */
+ { LTU, xtensa_b4constu, 0, 0, 0, 0, 1 }, /* LTU */
+ { GEU, xtensa_b4constu, 1, 1, 1, 0, 1 }, /* LEU */
+ };
+
+ enum internal_test test;
+ enum machine_mode mode;
+ struct cmp_info *p_info;
+
+ test = map_test_to_internal_test (test_code);
+ gcc_assert (test != ITEST_MAX);
+
+ p_info = &info[ (int)test ];
+
+ mode = GET_MODE (cmp0);
+ if (mode == VOIDmode)
+ mode = GET_MODE (cmp1);
+
+ /* Make sure we can handle any constants given to us. */
+ if (GET_CODE (cmp1) == CONST_INT)
+ {
+ HOST_WIDE_INT value = INTVAL (cmp1);
+ unsigned HOST_WIDE_INT uvalue = (unsigned HOST_WIDE_INT)value;
+
+ /* if the immediate overflows or does not fit in the immediate field,
+ spill it to a register */
+
+ if ((p_info->unsignedp ?
+ (uvalue + p_info->const_add > uvalue) :
+ (value + p_info->const_add > value)) != (p_info->const_add > 0))
+ {
+ cmp1 = force_reg (mode, cmp1);
+ }
+ else if (!(p_info->const_range_p) (value + p_info->const_add))
+ {
+ cmp1 = force_reg (mode, cmp1);
+ }
+ }
+ else if ((GET_CODE (cmp1) != REG) && (GET_CODE (cmp1) != SUBREG))
+ {
+ cmp1 = force_reg (mode, cmp1);
+ }
+
+ /* See if we need to invert the result. */
+ *p_invert = ((GET_CODE (cmp1) == CONST_INT)
+ ? p_info->invert_const
+ : p_info->invert_reg);
+
+ /* Comparison to constants, may involve adding 1 to change a LT into LE.
+ Comparison between two registers, may involve switching operands. */
+ if (GET_CODE (cmp1) == CONST_INT)
+ {
+ if (p_info->const_add != 0)
+ cmp1 = GEN_INT (INTVAL (cmp1) + p_info->const_add);
+
+ }
+ else if (p_info->reverse_regs)
+ {
+ rtx temp = cmp0;
+ cmp0 = cmp1;
+ cmp1 = temp;
+ }
+
+ return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1);
+}
+
+
+/* Generate the code to compare two float values. The return value is
+ the comparison expression. */
+
+static rtx
+gen_float_relational (enum rtx_code test_code, /* relational test (EQ, etc) */
+ rtx cmp0, /* first operand to compare */
+ rtx cmp1 /* second operand to compare */)
+{
+ rtx (*gen_fn) (rtx, rtx, rtx);
+ rtx brtmp;
+ int reverse_regs, invert;
+
+ switch (test_code)
+ {
+ case EQ: reverse_regs = 0; invert = 0; gen_fn = gen_seq_sf; break;
+ case NE: reverse_regs = 0; invert = 1; gen_fn = gen_seq_sf; break;
+ case LE: reverse_regs = 0; invert = 0; gen_fn = gen_sle_sf; break;
+ case GT: reverse_regs = 1; invert = 0; gen_fn = gen_slt_sf; break;
+ case LT: reverse_regs = 0; invert = 0; gen_fn = gen_slt_sf; break;
+ case GE: reverse_regs = 1; invert = 0; gen_fn = gen_sle_sf; break;
+ case UNEQ: reverse_regs = 0; invert = 0; gen_fn = gen_suneq_sf; break;
+ case LTGT: reverse_regs = 0; invert = 1; gen_fn = gen_suneq_sf; break;
+ case UNLE: reverse_regs = 0; invert = 0; gen_fn = gen_sunle_sf; break;
+ case UNGT: reverse_regs = 1; invert = 0; gen_fn = gen_sunlt_sf; break;
+ case UNLT: reverse_regs = 0; invert = 0; gen_fn = gen_sunlt_sf; break;
+ case UNGE: reverse_regs = 1; invert = 0; gen_fn = gen_sunle_sf; break;
+ case UNORDERED:
+ reverse_regs = 0; invert = 0; gen_fn = gen_sunordered_sf; break;
+ case ORDERED:
+ reverse_regs = 0; invert = 1; gen_fn = gen_sunordered_sf; break;
+ default:
+ fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1));
+ reverse_regs = 0; invert = 0; gen_fn = 0; /* avoid compiler warnings */
+ }
+
+ if (reverse_regs)
+ {
+ rtx temp = cmp0;
+ cmp0 = cmp1;
+ cmp1 = temp;
+ }
+
+ brtmp = gen_rtx_REG (CCmode, FPCC_REGNUM);
+ emit_insn (gen_fn (brtmp, cmp0, cmp1));
+
+ return gen_rtx_fmt_ee (invert ? EQ : NE, VOIDmode, brtmp, const0_rtx);
+}
+
+
+void
+xtensa_expand_conditional_branch (rtx *operands, enum machine_mode mode)
+{
+ enum rtx_code test_code = GET_CODE (operands[0]);
+ rtx cmp0 = operands[1];
+ rtx cmp1 = operands[2];
+ rtx cmp;
+ int invert;
+ rtx label1, label2;
+
+ switch (mode)
+ {
+ case DFmode:
+ default:
+ fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1));
+
+ case SImode:
+ invert = FALSE;
+ cmp = gen_int_relational (test_code, cmp0, cmp1, &invert);
+ break;
+
+ case SFmode:
+ if (!TARGET_HARD_FLOAT)
+ fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode,
+ cmp0, cmp1));
+ invert = FALSE;
+ cmp = gen_float_relational (test_code, cmp0, cmp1);
+ break;
+ }
+
+ /* Generate the branch. */
+
+ label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+ label2 = pc_rtx;
+
+ if (invert)
+ {
+ label2 = label1;
+ label1 = pc_rtx;
+ }
+
+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, cmp,
+ label1,
+ label2)));
+}
+
+
+static rtx
+gen_conditional_move (enum rtx_code code, enum machine_mode mode,
+ rtx op0, rtx op1)
+{
+ if (mode == SImode)
+ {
+ rtx cmp;
+
+ /* Jump optimization calls get_condition() which canonicalizes
+ comparisons like (GE x <const>) to (GT x <const-1>).
+ Transform those comparisons back to GE, since that is the
+ comparison supported in Xtensa. We shouldn't have to
+ transform <LE x const> comparisons, because neither
+ xtensa_expand_conditional_branch() nor get_condition() will
+ produce them. */
+
+ if ((code == GT) && (op1 == constm1_rtx))
+ {
+ code = GE;
+ op1 = const0_rtx;
+ }
+ cmp = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx);
+
+ if (boolean_operator (cmp, VOIDmode))
+ {
+ /* Swap the operands to make const0 second. */
+ if (op0 == const0_rtx)
+ {
+ op0 = op1;
+ op1 = const0_rtx;
+ }
+
+ /* If not comparing against zero, emit a comparison (subtract). */
+ if (op1 != const0_rtx)
+ {
+ op0 = expand_binop (SImode, sub_optab, op0, op1,
+ 0, 0, OPTAB_LIB_WIDEN);
+ op1 = const0_rtx;
+ }
+ }
+ else if (branch_operator (cmp, VOIDmode))
+ {
+ /* Swap the operands to make const0 second. */
+ if (op0 == const0_rtx)
+ {
+ op0 = op1;
+ op1 = const0_rtx;
+
+ switch (code)
+ {
+ case LT: code = GE; break;
+ case GE: code = LT; break;
+ default: gcc_unreachable ();
+ }
+ }
+
+ if (op1 != const0_rtx)
+ return 0;
+ }
+ else
+ return 0;
+
+ return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+ }
+
+ if (TARGET_HARD_FLOAT && mode == SFmode)
+ return gen_float_relational (code, op0, op1);
+
+ return 0;
+}
+
+
+int
+xtensa_expand_conditional_move (rtx *operands, int isflt)
+{
+ rtx dest = operands[0];
+ rtx cmp = operands[1];
+ enum machine_mode cmp_mode = GET_MODE (XEXP (cmp, 0));
+ rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
+
+ if (!(cmp = gen_conditional_move (GET_CODE (cmp), cmp_mode,
+ XEXP (cmp, 0), XEXP (cmp, 1))))
+ return 0;
+
+ if (isflt)
+ gen_fn = (cmp_mode == SImode
+ ? gen_movsfcc_internal0
+ : gen_movsfcc_internal1);
+ else
+ gen_fn = (cmp_mode == SImode
+ ? gen_movsicc_internal0
+ : gen_movsicc_internal1);
+
+ emit_insn (gen_fn (dest, XEXP (cmp, 0), operands[2], operands[3], cmp));
+ return 1;
+}
+
+
+int
+xtensa_expand_scc (rtx operands[4], enum machine_mode cmp_mode)
+{
+ rtx dest = operands[0];
+ rtx cmp;
+ rtx one_tmp, zero_tmp;
+ rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
+
+ if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
+ operands[2], operands[3])))
+ return 0;
+
+ one_tmp = gen_reg_rtx (SImode);
+ zero_tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_movsi (one_tmp, const_true_rtx));
+ emit_insn (gen_movsi (zero_tmp, const0_rtx));
+
+ gen_fn = (cmp_mode == SImode
+ ? gen_movsicc_internal0
+ : gen_movsicc_internal1);
+ emit_insn (gen_fn (dest, XEXP (cmp, 0), one_tmp, zero_tmp, cmp));
+ return 1;
+}
+
+
+/* Split OP[1] into OP[2,3] and likewise for OP[0] into OP[0,1]. MODE is
+ for the output, i.e., the input operands are twice as big as MODE. */
+
+void
+xtensa_split_operand_pair (rtx operands[4], enum machine_mode mode)
+{
+ switch (GET_CODE (operands[1]))
+ {
+ case REG:
+ operands[3] = gen_rtx_REG (mode, REGNO (operands[1]) + 1);
+ operands[2] = gen_rtx_REG (mode, REGNO (operands[1]));
+ break;
+
+ case MEM:
+ operands[3] = adjust_address (operands[1], mode, GET_MODE_SIZE (mode));
+ operands[2] = adjust_address (operands[1], mode, 0);
+ break;
+
+ case CONST_INT:
+ case CONST_DOUBLE:
+ split_double (operands[1], &operands[2], &operands[3]);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ switch (GET_CODE (operands[0]))
+ {
+ case REG:
+ operands[1] = gen_rtx_REG (mode, REGNO (operands[0]) + 1);
+ operands[0] = gen_rtx_REG (mode, REGNO (operands[0]));
+ break;
+
+ case MEM:
+ operands[1] = adjust_address (operands[0], mode, GET_MODE_SIZE (mode));
+ operands[0] = adjust_address (operands[0], mode, 0);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+
+/* Emit insns to move operands[1] into operands[0].
+ Return 1 if we have written out everything that needs to be done to
+ do the move. Otherwise, return 0 and the caller will emit the move
+ normally. */
+
+int
+xtensa_emit_move_sequence (rtx *operands, enum machine_mode mode)
+{
+ rtx src = operands[1];
+
+ if (CONSTANT_P (src)
+ && (GET_CODE (src) != CONST_INT || ! xtensa_simm12b (INTVAL (src))))
+ {
+ rtx dst = operands[0];
+
+ if (xtensa_tls_referenced_p (src))
+ {
+ rtx addend = NULL;
+
+ if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS)
+ {
+ addend = XEXP (XEXP (src, 0), 1);
+ src = XEXP (XEXP (src, 0), 0);
+ }
+
+ src = xtensa_legitimize_tls_address (src);
+ if (addend)
+ {
+ src = gen_rtx_PLUS (mode, src, addend);
+ src = force_operand (src, dst);
+ }
+ emit_move_insn (dst, src);
+ return 1;
+ }
+
+ if (! TARGET_CONST16)
+ {
+ src = force_const_mem (SImode, src);
+ operands[1] = src;
+ }
+
+ /* PC-relative loads are always SImode, and CONST16 is only
+ supported in the movsi pattern, so add a SUBREG for any other
+ (smaller) mode. */
+
+ if (mode != SImode)
+ {
+ if (register_operand (dst, mode))
+ {
+ emit_move_insn (simplify_gen_subreg (SImode, dst, mode, 0), src);
+ return 1;
+ }
+ else
+ {
+ src = force_reg (SImode, src);
+ src = gen_lowpart_SUBREG (mode, src);
+ operands[1] = src;
+ }
+ }
+ }
+
+ if (!(reload_in_progress | reload_completed)
+ && !xtensa_valid_move (mode, operands))
+ operands[1] = force_reg (mode, operands[1]);
+
+ operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+
+ /* During reload we don't want to emit (subreg:X (mem:Y)) since that
+ instruction won't be recognized after reload, so we remove the
+ subreg and adjust mem accordingly. */
+ if (reload_in_progress)
+ {
+ operands[0] = fixup_subreg_mem (operands[0]);
+ operands[1] = fixup_subreg_mem (operands[1]);
+ }
+ return 0;
+}
+
+
+static rtx
+fixup_subreg_mem (rtx x)
+{
+ if (GET_CODE (x) == SUBREG
+ && GET_CODE (SUBREG_REG (x)) == REG
+ && REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER)
+ {
+ rtx temp =
+ gen_rtx_SUBREG (GET_MODE (x),
+ reg_equiv_mem [REGNO (SUBREG_REG (x))],
+ SUBREG_BYTE (x));
+ x = alter_subreg (&temp);
+ }
+ return x;
+}
+
+
+/* Check if an incoming argument in a7 is expected to be used soon and
+ if OPND is a register or register pair that includes a7. If so,
+ create a new pseudo and copy a7 into that pseudo at the very
+ beginning of the function, followed by the special "set_frame_ptr"
+ unspec_volatile insn. The return value is either the original
+ operand, if it is not a7, or the new pseudo containing a copy of
+ the incoming argument. This is necessary because the register
+ allocator will ignore conflicts with a7 and may either assign some
+ other pseudo to a7 or use a7 as the hard_frame_pointer, clobbering
+ the incoming argument in a7. By copying the argument out of a7 as
+ the very first thing, and then immediately following that with an
+ unspec_volatile to keep the scheduler away, we should avoid any
+ problems. Putting the set_frame_ptr insn at the beginning, with
+ only the a7 copy before it, also makes it easier for the prologue
+ expander to initialize the frame pointer after the a7 copy and to
+ fix up the a7 copy to use the stack pointer instead of the frame
+ pointer. */
+
+rtx
+xtensa_copy_incoming_a7 (rtx opnd)
+{
+ rtx entry_insns = 0;
+ rtx reg, tmp;
+ enum machine_mode mode;
+
+ if (!cfun->machine->need_a7_copy)
+ return opnd;
+
+ /* This function should never be called again once a7 has been copied. */
+ gcc_assert (!cfun->machine->set_frame_ptr_insn);
+
+ mode = GET_MODE (opnd);
+
+ /* The operand using a7 may come in a later instruction, so just return
+ the original operand if it doesn't use a7. */
+ reg = opnd;
+ if (GET_CODE (reg) == SUBREG)
+ {
+ gcc_assert (SUBREG_BYTE (reg) == 0);
+ reg = SUBREG_REG (reg);
+ }
+ if (GET_CODE (reg) != REG
+ || REGNO (reg) > A7_REG
+ || REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) <= A7_REG)
+ return opnd;
+
+ /* 1-word args will always be in a7; 2-word args in a6/a7. */
+ gcc_assert (REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) - 1 == A7_REG);
+
+ cfun->machine->need_a7_copy = false;
+
+ /* Copy a7 to a new pseudo at the function entry. Use gen_raw_REG to
+ create the REG for a7 so that hard_frame_pointer_rtx is not used. */
+
+ start_sequence ();
+ tmp = gen_reg_rtx (mode);
+
+ switch (mode)
+ {
+ case DFmode:
+ case DImode:
+ /* Copy the value out of A7 here but keep the first word in A6 until
+ after the set_frame_ptr insn. Otherwise, the register allocator
+ may decide to put "subreg (tmp, 0)" in A7 and clobber the incoming
+ value. */
+ emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 4),
+ gen_raw_REG (SImode, A7_REG)));
+ break;
+ case SFmode:
+ emit_insn (gen_movsf_internal (tmp, gen_raw_REG (mode, A7_REG)));
+ break;
+ case SImode:
+ emit_insn (gen_movsi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+ break;
+ case HImode:
+ emit_insn (gen_movhi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+ break;
+ case QImode:
+ emit_insn (gen_movqi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ cfun->machine->set_frame_ptr_insn = emit_insn (gen_set_frame_ptr ());
+
+ /* For DF and DI mode arguments, copy the incoming value in A6 now. */
+ if (mode == DFmode || mode == DImode)
+ emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 0),
+ gen_rtx_REG (SImode, A7_REG - 1)));
+ entry_insns = get_insns ();
+ end_sequence ();
+
+ if (cfun->machine->vararg_a7)
+ {
+ /* This is called from within builtin_saveregs, which will insert the
+ saveregs code at the function entry, ahead of anything placed at
+ the function entry now. Instead, save the sequence to be inserted
+ at the beginning of the saveregs code. */
+ cfun->machine->vararg_a7_copy = entry_insns;
+ }
+ else
+ {
+ /* Put entry_insns after the NOTE that starts the function. If
+ this is inside a start_sequence, make the outer-level insn
+ chain current, so the code is placed at the start of the
+ function. */
+ push_topmost_sequence ();
+ /* Do not use entry_of_function() here. This is called from within
+ expand_function_start, when the CFG still holds GIMPLE. */
+ emit_insn_after (entry_insns, get_insns ());
+ pop_topmost_sequence ();
+ }
+
+ return tmp;
+}
+
+
+/* Try to expand a block move operation to a sequence of RTL move
+ instructions. If not optimizing, or if the block size is not a
+ constant, or if the block is too large, the expansion fails and GCC
+ falls back to calling memcpy().
+
+ operands[0] is the destination
+ operands[1] is the source
+ operands[2] is the length
+ operands[3] is the alignment */
+
+int
+xtensa_expand_block_move (rtx *operands)
+{
+ static const enum machine_mode mode_from_align[] =
+ {
+ VOIDmode, QImode, HImode, VOIDmode, SImode,
+ };
+
+ rtx dst_mem = operands[0];
+ rtx src_mem = operands[1];
+ HOST_WIDE_INT bytes, align;
+ int num_pieces, move_ratio;
+ rtx temp[2];
+ enum machine_mode mode[2];
+ int amount[2];
+ bool active[2];
+ int phase = 0;
+ int next;
+ int offset_ld = 0;
+ int offset_st = 0;
+ rtx x;
+
+ /* If this is not a fixed size move, just call memcpy. */
+ if (!optimize || (GET_CODE (operands[2]) != CONST_INT))
+ return 0;
+
+ bytes = INTVAL (operands[2]);
+ align = INTVAL (operands[3]);
+
+ /* Anything to move? */
+ if (bytes <= 0)
+ return 0;
+
+ if (align > MOVE_MAX)
+ align = MOVE_MAX;
+
+ /* Decide whether to expand inline based on the optimization level. */
+ move_ratio = 4;
+ if (optimize > 2)
+ move_ratio = LARGEST_MOVE_RATIO;
+ num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway. */
+ if (num_pieces > move_ratio)
+ return 0;
+
+ x = XEXP (dst_mem, 0);
+ if (!REG_P (x))
+ {
+ x = force_reg (Pmode, x);
+ dst_mem = replace_equiv_address (dst_mem, x);
+ }
+
+ x = XEXP (src_mem, 0);
+ if (!REG_P (x))
+ {
+ x = force_reg (Pmode, x);
+ src_mem = replace_equiv_address (src_mem, x);
+ }
+
+ active[0] = active[1] = false;
+
+ do
+ {
+ next = phase;
+ phase ^= 1;
+
+ if (bytes > 0)
+ {
+ int next_amount;
+
+ next_amount = (bytes >= 4 ? 4 : (bytes >= 2 ? 2 : 1));
+ next_amount = MIN (next_amount, align);
+
+ amount[next] = next_amount;
+ mode[next] = mode_from_align[next_amount];
+ temp[next] = gen_reg_rtx (mode[next]);
+
+ x = adjust_address (src_mem, mode[next], offset_ld);
+ emit_insn (gen_rtx_SET (VOIDmode, temp[next], x));
+
+ offset_ld += next_amount;
+ bytes -= next_amount;
+ active[next] = true;
+ }
+
+ if (active[phase])
+ {
+ active[phase] = false;
+
+ x = adjust_address (dst_mem, mode[phase], offset_st);
+ emit_insn (gen_rtx_SET (VOIDmode, x, temp[phase]));
+
+ offset_st += amount[phase];
+ }
+ }
+ while (active[next]);
+
+ return 1;
+}
+
+
+void
+xtensa_expand_nonlocal_goto (rtx *operands)
+{
+ rtx goto_handler = operands[1];
+ rtx containing_fp = operands[3];
+
+ /* Generate a call to "__xtensa_nonlocal_goto" (in libgcc); the code
+ is too big to generate in-line. */
+
+ if (GET_CODE (containing_fp) != REG)
+ containing_fp = force_reg (Pmode, containing_fp);
+
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_nonlocal_goto"),
+ LCT_NORMAL, VOIDmode, 2,
+ containing_fp, Pmode,
+ goto_handler, Pmode);
+}
+
+
+static struct machine_function *
+xtensa_init_machine_status (void)
+{
+ return ggc_alloc_cleared_machine_function ();
+}
+
+
+/* Shift VAL of mode MODE left by COUNT bits. */
+
+static inline rtx
+xtensa_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
+{
+ val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ return expand_simple_binop (SImode, ASHIFT, val, count,
+ NULL_RTX, 1, OPTAB_DIRECT);
+}
+
+
+/* Structure to hold the initial parameters for a compare_and_swap operation
+ in HImode and QImode. */
+
+struct alignment_context
+{
+ rtx memsi; /* SI aligned memory location. */
+ rtx shift; /* Bit offset with regard to lsb. */
+ rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
+ rtx modemaski; /* ~modemask */
+};
+
+
+/* Initialize structure AC for word access to HI and QI mode memory. */
+
+static void
+init_alignment_context (struct alignment_context *ac, rtx mem)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ rtx byteoffset = NULL_RTX;
+ bool aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
+
+ if (aligned)
+ ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
+ else
+ {
+ /* Alignment is unknown. */
+ rtx addr, align;
+
+ /* Force the address into a register. */
+ addr = force_reg (Pmode, XEXP (mem, 0));
+
+ /* Align it to SImode. */
+ align = expand_simple_binop (Pmode, AND, addr,
+ GEN_INT (-GET_MODE_SIZE (SImode)),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* Generate MEM. */
+ ac->memsi = gen_rtx_MEM (SImode, align);
+ MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
+ set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
+ set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
+
+ byteoffset = expand_simple_binop (Pmode, AND, addr,
+ GEN_INT (GET_MODE_SIZE (SImode) - 1),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ }
+
+ /* Calculate shiftcount. */
+ if (TARGET_BIG_ENDIAN)
+ {
+ ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
+ if (!aligned)
+ ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
+ NULL_RTX, 1, OPTAB_DIRECT);
+ }
+ else
+ {
+ if (aligned)
+ ac->shift = NULL_RTX;
+ else
+ ac->shift = byteoffset;
+ }
+
+ if (ac->shift != NULL_RTX)
+ {
+ /* Shift is the byte count, but we need the bitcount. */
+ ac->shift = expand_simple_binop (SImode, MULT, ac->shift,
+ GEN_INT (BITS_PER_UNIT),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ ac->modemask = expand_simple_binop (SImode, ASHIFT,
+ GEN_INT (GET_MODE_MASK (mode)),
+ ac->shift,
+ NULL_RTX, 1, OPTAB_DIRECT);
+ }
+ else
+ ac->modemask = GEN_INT (GET_MODE_MASK (mode));
+
+ ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1);
+}
+
+
+/* Expand an atomic compare and swap operation for HImode and QImode.
+ MEM is the memory location, CMP the old value to compare MEM with
+ and NEW_RTX the value to set if CMP == MEM. */
+
+void
+xtensa_expand_compare_and_swap (rtx target, rtx mem, rtx cmp, rtx new_rtx)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ struct alignment_context ac;
+ rtx tmp, cmpv, newv, val;
+ rtx oldval = gen_reg_rtx (SImode);
+ rtx res = gen_reg_rtx (SImode);
+ rtx csloop = gen_label_rtx ();
+ rtx csend = gen_label_rtx ();
+
+ init_alignment_context (&ac, mem);
+
+ if (ac.shift != NULL_RTX)
+ {
+ cmp = xtensa_expand_mask_and_shift (cmp, mode, ac.shift);
+ new_rtx = xtensa_expand_mask_and_shift (new_rtx, mode, ac.shift);
+ }
+
+ /* Load the surrounding word into VAL with the MEM value masked out. */
+ val = force_reg (SImode, expand_simple_binop (SImode, AND, ac.memsi,
+ ac.modemaski, NULL_RTX, 1,
+ OPTAB_DIRECT));
+ emit_label (csloop);
+
+ /* Patch CMP and NEW_RTX into VAL at correct position. */
+ cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val,
+ NULL_RTX, 1, OPTAB_DIRECT));
+ newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val,
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ /* Jump to end if we're done. */
+ emit_insn (gen_sync_compare_and_swapsi (res, ac.memsi, cmpv, newv));
+ emit_cmp_and_jump_insns (res, cmpv, EQ, const0_rtx, SImode, true, csend);
+
+ /* Check for changes outside mode. */
+ emit_move_insn (oldval, val);
+ tmp = expand_simple_binop (SImode, AND, res, ac.modemaski,
+ val, 1, OPTAB_DIRECT);
+ if (tmp != val)
+ emit_move_insn (val, tmp);
+
+ /* Loop internal if so. */
+ emit_cmp_and_jump_insns (oldval, val, NE, const0_rtx, SImode, true, csloop);
+
+ emit_label (csend);
+
+ /* Return the correct part of the bitfield. */
+ convert_move (target,
+ (ac.shift == NULL_RTX ? res
+ : expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
+ NULL_RTX, 1, OPTAB_DIRECT)),
+ 1);
+}
+
+
+/* Expand an atomic operation CODE of mode MODE (either HImode or QImode --
+ the default expansion works fine for SImode). MEM is the memory location
+ and VAL the value to play with. If AFTER is true then store the value
+ MEM holds after the operation, if AFTER is false then store the value MEM
+ holds before the operation. If TARGET is zero then discard that value, else
+ store it to TARGET. */
+
+void
+xtensa_expand_atomic (enum rtx_code code, rtx target, rtx mem, rtx val,
+ bool after)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ struct alignment_context ac;
+ rtx csloop = gen_label_rtx ();
+ rtx cmp, tmp;
+ rtx old = gen_reg_rtx (SImode);
+ rtx new_rtx = gen_reg_rtx (SImode);
+ rtx orig = NULL_RTX;
+
+ init_alignment_context (&ac, mem);
+
+ /* Prepare values before the compare-and-swap loop. */
+ if (ac.shift != NULL_RTX)
+ val = xtensa_expand_mask_and_shift (val, mode, ac.shift);
+ switch (code)
+ {
+ case PLUS:
+ case MINUS:
+ orig = gen_reg_rtx (SImode);
+ convert_move (orig, val, 1);
+ break;
+
+ case SET:
+ case IOR:
+ case XOR:
+ break;
+
+ case MULT: /* NAND */
+ case AND:
+ /* val = "11..1<val>11..1" */
+ val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
+ NULL_RTX, 1, OPTAB_DIRECT);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Load full word. Subsequent loads are performed by S32C1I. */
+ cmp = force_reg (SImode, ac.memsi);
+
+ emit_label (csloop);
+ emit_move_insn (old, cmp);
+
+ switch (code)
+ {
+ case PLUS:
+ case MINUS:
+ val = expand_simple_binop (SImode, code, old, orig,
+ NULL_RTX, 1, OPTAB_DIRECT);
+ val = expand_simple_binop (SImode, AND, val, ac.modemask,
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* FALLTHRU */
+ case SET:
+ tmp = expand_simple_binop (SImode, AND, old, ac.modemaski,
+ NULL_RTX, 1, OPTAB_DIRECT);
+ tmp = expand_simple_binop (SImode, IOR, tmp, val,
+ new_rtx, 1, OPTAB_DIRECT);
+ break;
+
+ case AND:
+ case IOR:
+ case XOR:
+ tmp = expand_simple_binop (SImode, code, old, val,
+ new_rtx, 1, OPTAB_DIRECT);
+ break;
+
+ case MULT: /* NAND */
+ tmp = expand_simple_binop (SImode, XOR, old, ac.modemask,
+ NULL_RTX, 1, OPTAB_DIRECT);
+ tmp = expand_simple_binop (SImode, AND, tmp, val,
+ new_rtx, 1, OPTAB_DIRECT);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (tmp != new_rtx)
+ emit_move_insn (new_rtx, tmp);
+ emit_insn (gen_sync_compare_and_swapsi (cmp, ac.memsi, old, new_rtx));
+ emit_cmp_and_jump_insns (cmp, old, NE, const0_rtx, SImode, true, csloop);
+
+ if (target)
+ {
+ tmp = (after ? new_rtx : cmp);
+ convert_move (target,
+ (ac.shift == NULL_RTX ? tmp
+ : expand_simple_binop (SImode, LSHIFTRT, tmp, ac.shift,
+ NULL_RTX, 1, OPTAB_DIRECT)),
+ 1);
+ }
+}
+
+
+void
+xtensa_setup_frame_addresses (void)
+{
+ /* Set flag to cause TARGET_FRAME_POINTER_REQUIRED to return true. */
+ cfun->machine->accesses_prev_frame = 1;
+
+ emit_library_call
+ (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_libgcc_window_spill"),
+ LCT_NORMAL, VOIDmode, 0);
+}
+
+
+/* Emit the assembly for the end of a zero-cost loop. Normally we just emit
+ a comment showing where the end of the loop is. However, if there is a
+ label or a branch at the end of the loop then we need to place a nop
+ there. If the loop ends with a label we need the nop so that branches
+ targeting that label will target the nop (and thus remain in the loop),
+ instead of targeting the instruction after the loop (and thus exiting
+ the loop). If the loop ends with a branch, we need the nop in case the
+ branch is targeting a location inside the loop. When the branch
+ executes it will cause the loop count to be decremented even if it is
+ taken (because it is the last instruction in the loop), so we need to
+ nop after the branch to prevent the loop count from being decremented
+ when the branch is taken. */
+
+void
+xtensa_emit_loop_end (rtx insn, rtx *operands)
+{
+ char done = 0;
+
+ for (insn = PREV_INSN (insn); insn && !done; insn = PREV_INSN (insn))
+ {
+ switch (GET_CODE (insn))
+ {
+ case NOTE:
+ case BARRIER:
+ break;
+
+ case CODE_LABEL:
+ output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands);
+ done = 1;
+ break;
+
+ default:
+ {
+ rtx body = PATTERN (insn);
+
+ if (GET_CODE (body) == JUMP_INSN)
+ {
+ output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands);
+ done = 1;
+ }
+ else if ((GET_CODE (body) != USE)
+ && (GET_CODE (body) != CLOBBER))
+ done = 1;
+ }
+ break;
+ }
+ }
+
+ output_asm_insn ("# loop end for %0", operands);
+}
+
+
+char *
+xtensa_emit_branch (bool inverted, bool immed, rtx *operands)
+{
+ static char result[64];
+ enum rtx_code code;
+ const char *op;
+
+ code = GET_CODE (operands[3]);
+ switch (code)
+ {
+ case EQ: op = inverted ? "ne" : "eq"; break;
+ case NE: op = inverted ? "eq" : "ne"; break;
+ case LT: op = inverted ? "ge" : "lt"; break;
+ case GE: op = inverted ? "lt" : "ge"; break;
+ case LTU: op = inverted ? "geu" : "ltu"; break;
+ case GEU: op = inverted ? "ltu" : "geu"; break;
+ default: gcc_unreachable ();
+ }
+
+ if (immed)
+ {
+ if (INTVAL (operands[1]) == 0)
+ sprintf (result, "b%sz%s\t%%0, %%2", op,
+ (TARGET_DENSITY && (code == EQ || code == NE)) ? ".n" : "");
+ else
+ sprintf (result, "b%si\t%%0, %%d1, %%2", op);
+ }
+ else
+ sprintf (result, "b%s\t%%0, %%1, %%2", op);
+
+ return result;
+}
+
+
+char *
+xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands)
+{
+ static char result[64];
+ const char *op;
+
+ switch (GET_CODE (operands[3]))
+ {
+ case EQ: op = inverted ? "bs" : "bc"; break;
+ case NE: op = inverted ? "bc" : "bs"; break;
+ default: gcc_unreachable ();
+ }
+
+ if (immed)
+ {
+ unsigned bitnum = INTVAL (operands[1]) & 0x1f;
+ operands[1] = GEN_INT (bitnum);
+ sprintf (result, "b%si\t%%0, %%d1, %%2", op);
+ }
+ else
+ sprintf (result, "b%s\t%%0, %%1, %%2", op);
+
+ return result;
+}
+
+
+char *
+xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands)
+{
+ static char result[64];
+ enum rtx_code code;
+ const char *op;
+
+ code = GET_CODE (operands[4]);
+ if (isbool)
+ {
+ switch (code)
+ {
+ case EQ: op = inverted ? "t" : "f"; break;
+ case NE: op = inverted ? "f" : "t"; break;
+ default: gcc_unreachable ();
+ }
+ }
+ else
+ {
+ switch (code)
+ {
+ case EQ: op = inverted ? "nez" : "eqz"; break;
+ case NE: op = inverted ? "eqz" : "nez"; break;
+ case LT: op = inverted ? "gez" : "ltz"; break;
+ case GE: op = inverted ? "ltz" : "gez"; break;
+ default: gcc_unreachable ();
+ }
+ }
+
+ sprintf (result, "mov%s%s\t%%0, %%%d, %%1",
+ op, isfp ? ".s" : "", inverted ? 3 : 2);
+ return result;
+}
+
+
+char *
+xtensa_emit_call (int callop, rtx *operands)
+{
+ static char result[64];
+ rtx tgt = operands[callop];
+
+ if (GET_CODE (tgt) == CONST_INT)
+ sprintf (result, "call8\t0x%lx", INTVAL (tgt));
+ else if (register_operand (tgt, VOIDmode))
+ sprintf (result, "callx8\t%%%d", callop);
+ else
+ sprintf (result, "call8\t%%%d", callop);
+
+ return result;
+}
+
+
+bool
+xtensa_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
+{
+ /* Allow constant pool addresses. */
+ if (mode != BLKmode && GET_MODE_SIZE (mode) >= UNITS_PER_WORD
+ && ! TARGET_CONST16 && constantpool_address_p (addr)
+ && ! xtensa_tls_referenced_p (addr))
+ return true;
+
+ while (GET_CODE (addr) == SUBREG)
+ addr = SUBREG_REG (addr);
+
+ /* Allow base registers. */
+ if (GET_CODE (addr) == REG && BASE_REG_P (addr, strict))
+ return true;
+
+ /* Check for "register + offset" addressing. */
+ if (GET_CODE (addr) == PLUS)
+ {
+ rtx xplus0 = XEXP (addr, 0);
+ rtx xplus1 = XEXP (addr, 1);
+ enum rtx_code code0;
+ enum rtx_code code1;
+
+ while (GET_CODE (xplus0) == SUBREG)
+ xplus0 = SUBREG_REG (xplus0);
+ code0 = GET_CODE (xplus0);
+
+ while (GET_CODE (xplus1) == SUBREG)
+ xplus1 = SUBREG_REG (xplus1);
+ code1 = GET_CODE (xplus1);
+
+ /* Swap operands if necessary so the register is first. */
+ if (code0 != REG && code1 == REG)
+ {
+ xplus0 = XEXP (addr, 1);
+ xplus1 = XEXP (addr, 0);
+ code0 = GET_CODE (xplus0);
+ code1 = GET_CODE (xplus1);
+ }
+
+ if (code0 == REG && BASE_REG_P (xplus0, strict)
+ && code1 == CONST_INT
+ && xtensa_mem_offset (INTVAL (xplus1), mode))
+ return true;
+ }
+
+ return false;
+}
+
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
+
+static GTY(()) rtx xtensa_tls_module_base_symbol;
+
+static rtx
+xtensa_tls_module_base (void)
+{
+ if (! xtensa_tls_module_base_symbol)
+ {
+ xtensa_tls_module_base_symbol =
+ gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
+ SYMBOL_REF_FLAGS (xtensa_tls_module_base_symbol)
+ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+ }
+
+ return xtensa_tls_module_base_symbol;
+}
+
+
+static rtx
+xtensa_call_tls_desc (rtx sym, rtx *retp)
+{
+ rtx fn, arg, a10, call_insn, insns;
+
+ start_sequence ();
+ fn = gen_reg_rtx (Pmode);
+ arg = gen_reg_rtx (Pmode);
+ a10 = gen_rtx_REG (Pmode, 10);
+
+ emit_insn (gen_tls_func (fn, sym));
+ emit_insn (gen_tls_arg (arg, sym));
+ emit_move_insn (a10, arg);
+ call_insn = emit_call_insn (gen_tls_call (a10, fn, sym, const1_rtx));
+ CALL_INSN_FUNCTION_USAGE (call_insn)
+ = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, a10),
+ CALL_INSN_FUNCTION_USAGE (call_insn));
+ insns = get_insns ();
+ end_sequence ();
+
+ *retp = a10;
+ return insns;
+}
+
+
+static rtx
+xtensa_legitimize_tls_address (rtx x)
+{
+ unsigned int model = SYMBOL_REF_TLS_MODEL (x);
+ rtx dest, tp, ret, modbase, base, addend, insns;
+
+ dest = gen_reg_rtx (Pmode);
+ switch (model)
+ {
+ case TLS_MODEL_GLOBAL_DYNAMIC:
+ insns = xtensa_call_tls_desc (x, &ret);
+ emit_libcall_block (insns, dest, ret, x);
+ break;
+
+ case TLS_MODEL_LOCAL_DYNAMIC:
+ base = gen_reg_rtx (Pmode);
+ modbase = xtensa_tls_module_base ();
+ insns = xtensa_call_tls_desc (modbase, &ret);
+ emit_libcall_block (insns, base, ret, modbase);
+ addend = force_reg (SImode, gen_sym_DTPOFF (x));
+ emit_insn (gen_addsi3 (dest, base, addend));
+ break;
+
+ case TLS_MODEL_INITIAL_EXEC:
+ case TLS_MODEL_LOCAL_EXEC:
+ tp = gen_reg_rtx (SImode);
+ emit_insn (gen_load_tp (tp));
+ addend = force_reg (SImode, gen_sym_TPOFF (x));
+ emit_insn (gen_addsi3 (dest, tp, addend));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return dest;
+}
+
+
+rtx
+xtensa_legitimize_address (rtx x,
+ rtx oldx ATTRIBUTE_UNUSED,
+ enum machine_mode mode)
+{
+ if (xtensa_tls_symbol_p (x))
+ return xtensa_legitimize_tls_address (x);
+
+ if (GET_CODE (x) == PLUS)
+ {
+ rtx plus0 = XEXP (x, 0);
+ rtx plus1 = XEXP (x, 1);
+
+ if (GET_CODE (plus0) != REG && GET_CODE (plus1) == REG)
+ {
+ plus0 = XEXP (x, 1);
+ plus1 = XEXP (x, 0);
+ }
+
+ /* Try to split up the offset to use an ADDMI instruction. */
+ if (GET_CODE (plus0) == REG
+ && GET_CODE (plus1) == CONST_INT
+ && !xtensa_mem_offset (INTVAL (plus1), mode)
+ && !xtensa_simm8 (INTVAL (plus1))
+ && xtensa_mem_offset (INTVAL (plus1) & 0xff, mode)
+ && xtensa_simm8x256 (INTVAL (plus1) & ~0xff))
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ rtx addmi_offset = GEN_INT (INTVAL (plus1) & ~0xff);
+ emit_insn (gen_rtx_SET (Pmode, temp,
+ gen_rtx_PLUS (Pmode, plus0, addmi_offset)));
+ return gen_rtx_PLUS (Pmode, temp, GEN_INT (INTVAL (plus1) & 0xff));
+ }
+ }
+
+ return x;
+}
+
+/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P.
+
+ Treat constant-pool references as "mode dependent" since they can
+ only be accessed with SImode loads. This works around a bug in the
+ combiner where a constant pool reference is temporarily converted
+ to an HImode load, which is then assumed to zero-extend based on
+ our definition of LOAD_EXTEND_OP. This is wrong because the high
+ bits of a 16-bit value in the constant pool are now sign-extended
+ by default. */
+
+static bool
+xtensa_mode_dependent_address_p (const_rtx addr)
+{
+ return constantpool_address_p (addr);
+}
+
+/* Helper for xtensa_tls_referenced_p. */
+
+static int
+xtensa_tls_referenced_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+ if (GET_CODE (*x) == SYMBOL_REF)
+ return SYMBOL_REF_TLS_MODEL (*x) != 0;
+
+ /* Ignore TLS references that have already been legitimized. */
+ if (GET_CODE (*x) == UNSPEC)
+ {
+ switch (XINT (*x, 1))
+ {
+ case UNSPEC_TPOFF:
+ case UNSPEC_DTPOFF:
+ case UNSPEC_TLS_FUNC:
+ case UNSPEC_TLS_ARG:
+ case UNSPEC_TLS_CALL:
+ return -1;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+/* Return TRUE if X contains any TLS symbol references. */
+
+bool
+xtensa_tls_referenced_p (rtx x)
+{
+ if (! TARGET_HAVE_TLS)
+ return false;
+
+ return for_each_rtx (&x, xtensa_tls_referenced_p_1, NULL);
+}
+
+
+/* Return the debugger register number to use for 'regno'. */
+
+int
+xtensa_dbx_register_number (int regno)
+{
+ int first = -1;
+
+ if (GP_REG_P (regno))
+ {
+ regno -= GP_REG_FIRST;
+ first = 0;
+ }
+ else if (BR_REG_P (regno))
+ {
+ regno -= BR_REG_FIRST;
+ first = 16;
+ }
+ else if (FP_REG_P (regno))
+ {
+ regno -= FP_REG_FIRST;
+ first = 48;
+ }
+ else if (ACC_REG_P (regno))
+ {
+ first = 0x200; /* Start of Xtensa special registers. */
+ regno = 16; /* ACCLO is special register 16. */
+ }
+
+ /* When optimizing, we sometimes get asked about pseudo-registers
+ that don't represent hard registers. Return 0 for these. */
+ if (first == -1)
+ return 0;
+
+ return first + regno;
+}
+
+
+/* Argument support functions. */
+
+/* Initialize CUMULATIVE_ARGS for a function. */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, int incoming)
+{
+ cum->arg_words = 0;
+ cum->incoming = incoming;
+}
+
+
+/* Advance the argument to the next argument position. */
+
+static void
+xtensa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+ int words, max;
+ int *arg_words;
+
+ arg_words = &cum->arg_words;
+ max = MAX_ARGS_IN_REGISTERS;
+
+ words = (((mode != BLKmode)
+ ? (int) GET_MODE_SIZE (mode)
+ : int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ if (*arg_words < max
+ && (targetm.calls.must_pass_in_stack (mode, type)
+ || *arg_words + words > max))
+ *arg_words = max;
+
+ *arg_words += words;
+}
+
+
+/* Return an RTL expression containing the register for the given mode,
+ or 0 if the argument is to be passed on the stack. INCOMING_P is nonzero
+ if this is an incoming argument to the current function. */
+
+static rtx
+xtensa_function_arg_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ const_tree type, bool incoming_p)
+{
+ int regbase, words, max;
+ int *arg_words;
+ int regno;
+
+ arg_words = &cum->arg_words;
+ regbase = (incoming_p ? GP_ARG_FIRST : GP_OUTGOING_ARG_FIRST);
+ max = MAX_ARGS_IN_REGISTERS;
+
+ words = (((mode != BLKmode)
+ ? (int) GET_MODE_SIZE (mode)
+ : int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ if (type && (TYPE_ALIGN (type) > BITS_PER_WORD))
+ {
+ int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_WORD;
+ *arg_words = (*arg_words + align - 1) & -align;
+ }
+
+ if (*arg_words + words > max)
+ return (rtx)0;
+
+ regno = regbase + *arg_words;
+
+ if (cum->incoming && regno <= A7_REG && regno + words > A7_REG)
+ cfun->machine->need_a7_copy = true;
+
+ return gen_rtx_REG (mode, regno);
+}
+
+/* Implement TARGET_FUNCTION_ARG. */
+
+static rtx
+xtensa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+ return xtensa_function_arg_1 (cum, mode, type, false);
+}
+
+/* Implement TARGET_FUNCTION_INCOMING_ARG. */
+
+static rtx
+xtensa_function_incoming_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+ return xtensa_function_arg_1 (cum, mode, type, true);
+}
+
+static unsigned int
+xtensa_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+ unsigned int alignment;
+
+ alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
+ if (alignment < PARM_BOUNDARY)
+ alignment = PARM_BOUNDARY;
+ if (alignment > STACK_BOUNDARY)
+ alignment = STACK_BOUNDARY;
+ return alignment;
+}
+
+
+static bool
+xtensa_return_in_msb (const_tree valtype)
+{
+ return (TARGET_BIG_ENDIAN
+ && AGGREGATE_TYPE_P (valtype)
+ && int_size_in_bytes (valtype) >= UNITS_PER_WORD);
+}
+
+
+static void
+xtensa_option_override (void)
+{
+ int regno;
+ enum machine_mode mode;
+
+ if (!TARGET_BOOLEANS && TARGET_HARD_FLOAT)
+ error ("boolean registers required for the floating-point option");
+
+ /* Set up array giving whether a given register can hold a given mode. */
+ for (mode = VOIDmode;
+ mode != MAX_MACHINE_MODE;
+ mode = (enum machine_mode) ((int) mode + 1))
+ {
+ int size = GET_MODE_SIZE (mode);
+ enum mode_class mclass = GET_MODE_CLASS (mode);
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ {
+ int temp;
+
+ if (ACC_REG_P (regno))
+ temp = (TARGET_MAC16
+ && (mclass == MODE_INT) && (size <= UNITS_PER_WORD));
+ else if (GP_REG_P (regno))
+ temp = ((regno & 1) == 0 || (size <= UNITS_PER_WORD));
+ else if (FP_REG_P (regno))
+ temp = (TARGET_HARD_FLOAT && (mode == SFmode));
+ else if (BR_REG_P (regno))
+ temp = (TARGET_BOOLEANS && (mode == CCmode));
+ else
+ temp = FALSE;
+
+ xtensa_hard_regno_mode_ok[(int) mode][regno] = temp;
+ }
+ }
+
+ init_machine_status = xtensa_init_machine_status;
+
+ /* Check PIC settings. PIC is only supported when using L32R
+ instructions, and some targets need to always use PIC. */
+ if (flag_pic && TARGET_CONST16)
+ error ("-f%s is not supported with CONST16 instructions",
+ (flag_pic > 1 ? "PIC" : "pic"));
+ else if (TARGET_FORCE_NO_PIC)
+ flag_pic = 0;
+ else if (XTENSA_ALWAYS_PIC)
+ {
+ if (TARGET_CONST16)
+ error ("PIC is required but not supported with CONST16 instructions");
+ flag_pic = 1;
+ }
+ /* There's no need for -fPIC (as opposed to -fpic) on Xtensa. */
+ if (flag_pic > 1)
+ flag_pic = 1;
+ if (flag_pic && !flag_pie)
+ flag_shlib = 1;
+
+ /* Hot/cold partitioning does not work on this architecture, because of
+ constant pools (the load instruction cannot necessarily reach that far).
+ Therefore disable it on this architecture. */
+ if (flag_reorder_blocks_and_partition)
+ {
+ flag_reorder_blocks_and_partition = 0;
+ flag_reorder_blocks = 1;
+ }
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+ assembler syntax for an instruction operand X. X is an RTL
+ expression.
+
+ CODE is a value that can be used to specify one of several ways
+ of printing the operand. It is used when identical operands
+ must be printed differently depending on the context. CODE
+ comes from the '%' specification that was used to request
+ printing of the operand. If the specification was just '%DIGIT'
+ then CODE is 0; if the specification was '%LTR DIGIT' then CODE
+ is the ASCII code for LTR.
+
+ If X is a register, this macro should print the register's name.
+ The names can be found in an array 'reg_names' whose type is
+ 'char *[]'. 'reg_names' is initialized from 'REGISTER_NAMES'.
+
+ When the machine description has a specification '%PUNCT' (a '%'
+ followed by a punctuation character), this macro is called with
+ a null pointer for X and the punctuation character for CODE.
+
+ 'a', 'c', 'l', and 'n' are reserved.
+
+ The Xtensa specific codes are:
+
+ 'd' CONST_INT, print as signed decimal
+ 'x' CONST_INT, print as signed hexadecimal
+ 'K' CONST_INT, print number of bits in mask for EXTUI
+ 'R' CONST_INT, print (X & 0x1f)
+ 'L' CONST_INT, print ((32 - X) & 0x1f)
+ 'D' REG, print second register of double-word register operand
+ 'N' MEM, print address of next word following a memory operand
+ 'v' MEM, if memory reference is volatile, output a MEMW before it
+ 't' any constant, add "@h" suffix for top 16 bits
+ 'b' any constant, add "@l" suffix for bottom 16 bits
+*/
+
+static void
+printx (FILE *file, signed int val)
+{
+ /* Print a hexadecimal value in a nice way. */
+ if ((val > -0xa) && (val < 0xa))
+ fprintf (file, "%d", val);
+ else if (val < 0)
+ fprintf (file, "-0x%x", -val);
+ else
+ fprintf (file, "0x%x", val);
+}
+
+
+void
+print_operand (FILE *file, rtx x, int letter)
+{
+ if (!x)
+ error ("PRINT_OPERAND null pointer");
+
+ switch (letter)
+ {
+ case 'D':
+ if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+ fprintf (file, "%s", reg_names[xt_true_regnum (x) + 1]);
+ else
+ output_operand_lossage ("invalid %%D value");
+ break;
+
+ case 'v':
+ if (GET_CODE (x) == MEM)
+ {
+ /* For a volatile memory reference, emit a MEMW before the
+ load or store. */
+ if (MEM_VOLATILE_P (x) && TARGET_SERIALIZE_VOLATILE)
+ fprintf (file, "memw\n\t");
+ }
+ else
+ output_operand_lossage ("invalid %%v value");
+ break;
+
+ case 'N':
+ if (GET_CODE (x) == MEM
+ && (GET_MODE (x) == DFmode || GET_MODE (x) == DImode))
+ {
+ x = adjust_address (x, GET_MODE (x) == DFmode ? SFmode : SImode, 4);
+ output_address (XEXP (x, 0));
+ }
+ else
+ output_operand_lossage ("invalid %%N value");
+ break;
+
+ case 'K':
+ if (GET_CODE (x) == CONST_INT)
+ {
+ int num_bits = 0;
+ unsigned val = INTVAL (x);
+ while (val & 1)
+ {
+ num_bits += 1;
+ val = val >> 1;
+ }
+ if ((val != 0) || (num_bits == 0) || (num_bits > 16))
+ fatal_insn ("invalid mask", x);
+
+ fprintf (file, "%d", num_bits);
+ }
+ else
+ output_operand_lossage ("invalid %%K value");
+ break;
+
+ case 'L':
+ if (GET_CODE (x) == CONST_INT)
+ fprintf (file, "%ld", (32 - INTVAL (x)) & 0x1f);
+ else
+ output_operand_lossage ("invalid %%L value");
+ break;
+
+ case 'R':
+ if (GET_CODE (x) == CONST_INT)
+ fprintf (file, "%ld", INTVAL (x) & 0x1f);
+ else
+ output_operand_lossage ("invalid %%R value");
+ break;
+
+ case 'x':
+ if (GET_CODE (x) == CONST_INT)
+ printx (file, INTVAL (x));
+ else
+ output_operand_lossage ("invalid %%x value");
+ break;
+
+ case 'd':
+ if (GET_CODE (x) == CONST_INT)
+ fprintf (file, "%ld", INTVAL (x));
+ else
+ output_operand_lossage ("invalid %%d value");
+ break;
+
+ case 't':
+ case 'b':
+ if (GET_CODE (x) == CONST_INT)
+ {
+ printx (file, INTVAL (x));
+ fputs (letter == 't' ? "@h" : "@l", file);
+ }
+ else if (GET_CODE (x) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+ if (GET_MODE (x) == SFmode)
+ {
+ long l;
+ REAL_VALUE_TO_TARGET_SINGLE (r, l);
+ fprintf (file, "0x%08lx@%c", l, letter == 't' ? 'h' : 'l');
+ }
+ else
+ output_operand_lossage ("invalid %%t/%%b value");
+ }
+ else if (GET_CODE (x) == CONST)
+ {
+ /* X must be a symbolic constant on ELF. Write an expression
+ suitable for 'const16' that sets the high or low 16 bits. */
+ if (GET_CODE (XEXP (x, 0)) != PLUS
+ || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
+ || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
+ output_operand_lossage ("invalid %%t/%%b value");
+ print_operand (file, XEXP (XEXP (x, 0), 0), 0);
+ fputs (letter == 't' ? "@h" : "@l", file);
+ /* There must be a non-alphanumeric character between 'h' or 'l'
+ and the number. The '-' is added by print_operand() already. */
+ if (INTVAL (XEXP (XEXP (x, 0), 1)) >= 0)
+ fputs ("+", file);
+ print_operand (file, XEXP (XEXP (x, 0), 1), 0);
+ }
+ else
+ {
+ output_addr_const (file, x);
+ fputs (letter == 't' ? "@h" : "@l", file);
+ }
+ break;
+
+ default:
+ if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+ fprintf (file, "%s", reg_names[xt_true_regnum (x)]);
+ else if (GET_CODE (x) == MEM)
+ output_address (XEXP (x, 0));
+ else if (GET_CODE (x) == CONST_INT)
+ fprintf (file, "%ld", INTVAL (x));
+ else
+ output_addr_const (file, x);
+ }
+}
+
+
+/* A C compound statement to output to stdio stream STREAM the
+ assembler syntax for an instruction operand that is a memory
+ reference whose address is ADDR. ADDR is an RTL expression. */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+ if (!addr)
+ error ("PRINT_OPERAND_ADDRESS, null pointer");
+
+ switch (GET_CODE (addr))
+ {
+ default:
+ fatal_insn ("invalid address", addr);
+ break;
+
+ case REG:
+ fprintf (file, "%s, 0", reg_names [REGNO (addr)]);
+ break;
+
+ case PLUS:
+ {
+ rtx reg = (rtx)0;
+ rtx offset = (rtx)0;
+ rtx arg0 = XEXP (addr, 0);
+ rtx arg1 = XEXP (addr, 1);
+
+ if (GET_CODE (arg0) == REG)
+ {
+ reg = arg0;
+ offset = arg1;
+ }
+ else if (GET_CODE (arg1) == REG)
+ {
+ reg = arg1;
+ offset = arg0;
+ }
+ else
+ fatal_insn ("no register in address", addr);
+
+ if (CONSTANT_P (offset))
+ {
+ fprintf (file, "%s, ", reg_names [REGNO (reg)]);
+ output_addr_const (file, offset);
+ }
+ else
+ fatal_insn ("address offset not a constant", addr);
+ }
+ break;
+
+ case LABEL_REF:
+ case SYMBOL_REF:
+ case CONST_INT:
+ case CONST:
+ output_addr_const (file, addr);
+ break;
+ }
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
+
+static bool
+xtensa_output_addr_const_extra (FILE *fp, rtx x)
+{
+ if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
+ {
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_TPOFF:
+ output_addr_const (fp, XVECEXP (x, 0, 0));
+ fputs ("@TPOFF", fp);
+ return true;
+ case UNSPEC_DTPOFF:
+ output_addr_const (fp, XVECEXP (x, 0, 0));
+ fputs ("@DTPOFF", fp);
+ return true;
+ case UNSPEC_PLT:
+ if (flag_pic)
+ {
+ output_addr_const (fp, XVECEXP (x, 0, 0));
+ fputs ("@PLT", fp);
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+
+void
+xtensa_output_literal (FILE *file, rtx x, enum machine_mode mode, int labelno)
+{
+ long value_long[2];
+ REAL_VALUE_TYPE r;
+ int size;
+ rtx first, second;
+
+ fprintf (file, "\t.literal .LC%u, ", (unsigned) labelno);
+
+ switch (GET_MODE_CLASS (mode))
+ {
+ case MODE_FLOAT:
+ gcc_assert (GET_CODE (x) == CONST_DOUBLE);
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+ switch (mode)
+ {
+ case SFmode:
+ REAL_VALUE_TO_TARGET_SINGLE (r, value_long[0]);
+ if (HOST_BITS_PER_LONG > 32)
+ value_long[0] &= 0xffffffff;
+ fprintf (file, "0x%08lx\n", value_long[0]);
+ break;
+
+ case DFmode:
+ REAL_VALUE_TO_TARGET_DOUBLE (r, value_long);
+ if (HOST_BITS_PER_LONG > 32)
+ {
+ value_long[0] &= 0xffffffff;
+ value_long[1] &= 0xffffffff;
+ }
+ fprintf (file, "0x%08lx, 0x%08lx\n",
+ value_long[0], value_long[1]);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ break;
+
+ case MODE_INT:
+ case MODE_PARTIAL_INT:
+ size = GET_MODE_SIZE (mode);
+ switch (size)
+ {
+ case 4:
+ output_addr_const (file, x);
+ fputs ("\n", file);
+ break;
+
+ case 8:
+ split_double (x, &first, &second);
+ output_addr_const (file, first);
+ fputs (", ", file);
+ output_addr_const (file, second);
+ fputs ("\n", file);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+
+/* Return the bytes needed to compute the frame pointer from the current
+ stack pointer. */
+
+#define STACK_BYTES (STACK_BOUNDARY / BITS_PER_UNIT)
+#define XTENSA_STACK_ALIGN(LOC) (((LOC) + STACK_BYTES-1) & ~(STACK_BYTES-1))
+
+long
+compute_frame_size (int size)
+{
+ /* Add space for the incoming static chain value. */
+ if (cfun->static_chain_decl != NULL)
+ size += (1 * UNITS_PER_WORD);
+
+ xtensa_current_frame_size =
+ XTENSA_STACK_ALIGN (size
+ + crtl->outgoing_args_size
+ + (WINDOW_SIZE * UNITS_PER_WORD));
+ return xtensa_current_frame_size;
+}
+
+
+bool
+xtensa_frame_pointer_required (void)
+{
+ /* The code to expand builtin_frame_addr and builtin_return_addr
+ currently uses the hard_frame_pointer instead of frame_pointer.
+ This seems wrong but maybe it's necessary for other architectures.
+ This function is derived from the i386 code. */
+
+ if (cfun->machine->accesses_prev_frame)
+ return true;
+
+ return false;
+}
+
+
+/* minimum frame = reg save area (4 words) plus static chain (1 word)
+ and the total number of words must be a multiple of 128 bits. */
+#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
+
+void
+xtensa_expand_prologue (void)
+{
+ HOST_WIDE_INT total_size;
+ rtx size_rtx;
+ rtx insn, note_rtx;
+
+ total_size = compute_frame_size (get_frame_size ());
+ size_rtx = GEN_INT (total_size);
+
+ if (total_size < (1 << (12+3)))
+ insn = emit_insn (gen_entry (size_rtx));
+ else
+ {
+ /* Use a8 as a temporary since a0-a7 may be live. */
+ rtx tmp_reg = gen_rtx_REG (Pmode, A8_REG);
+ emit_insn (gen_entry (GEN_INT (MIN_FRAME_SIZE)));
+ emit_move_insn (tmp_reg, GEN_INT (total_size - MIN_FRAME_SIZE));
+ emit_insn (gen_subsi3 (tmp_reg, stack_pointer_rtx, tmp_reg));
+ insn = emit_insn (gen_movsi (stack_pointer_rtx, tmp_reg));
+ }
+
+ if (frame_pointer_needed)
+ {
+ if (cfun->machine->set_frame_ptr_insn)
+ {
+ rtx first;
+
+ push_topmost_sequence ();
+ first = get_insns ();
+ pop_topmost_sequence ();
+
+ /* For all instructions prior to set_frame_ptr_insn, replace
+ hard_frame_pointer references with stack_pointer. */
+ for (insn = first;
+ insn != cfun->machine->set_frame_ptr_insn;
+ insn = NEXT_INSN (insn))
+ {
+ if (INSN_P (insn))
+ {
+ PATTERN (insn) = replace_rtx (copy_rtx (PATTERN (insn)),
+ hard_frame_pointer_rtx,
+ stack_pointer_rtx);
+ df_insn_rescan (insn);
+ }
+ }
+ }
+ else
+ insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
+ stack_pointer_rtx));
+ }
+
+ /* Create a note to describe the CFA. Because this is only used to set
+ DW_AT_frame_base for debug info, don't bother tracking changes through
+ each instruction in the prologue. It just takes up space. */
+ note_rtx = gen_rtx_SET (VOIDmode, (frame_pointer_needed
+ ? hard_frame_pointer_rtx
+ : stack_pointer_rtx),
+ plus_constant (stack_pointer_rtx, -total_size));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+}
+
+
+/* Clear variables at function end. */
+
+void
+xtensa_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+ HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+ xtensa_current_frame_size = 0;
+}
+
+
+rtx
+xtensa_return_addr (int count, rtx frame)
+{
+ rtx result, retaddr, curaddr, label;
+
+ if (count == -1)
+ retaddr = gen_rtx_REG (Pmode, A0_REG);
+ else
+ {
+ rtx addr = plus_constant (frame, -4 * UNITS_PER_WORD);
+ addr = memory_address (Pmode, addr);
+ retaddr = gen_reg_rtx (Pmode);
+ emit_move_insn (retaddr, gen_rtx_MEM (Pmode, addr));
+ }
+
+ /* The 2 most-significant bits of the return address on Xtensa hold
+ the register window size. To get the real return address, these
+ bits must be replaced with the high bits from some address in the
+ code. */
+
+ /* Get the 2 high bits of a local label in the code. */
+ curaddr = gen_reg_rtx (Pmode);
+ label = gen_label_rtx ();
+ emit_label (label);
+ LABEL_PRESERVE_P (label) = 1;
+ emit_move_insn (curaddr, gen_rtx_LABEL_REF (Pmode, label));
+ emit_insn (gen_lshrsi3 (curaddr, curaddr, GEN_INT (30)));
+ emit_insn (gen_ashlsi3 (curaddr, curaddr, GEN_INT (30)));
+
+ /* Clear the 2 high bits of the return address. */
+ result = gen_reg_rtx (Pmode);
+ emit_insn (gen_ashlsi3 (result, retaddr, GEN_INT (2)));
+ emit_insn (gen_lshrsi3 (result, result, GEN_INT (2)));
+
+ /* Combine them to get the result. */
+ emit_insn (gen_iorsi3 (result, result, curaddr));
+ return result;
+}
+
+
+/* Create the va_list data type.
+
+ This structure is set up by __builtin_saveregs. The __va_reg field
+ points to a stack-allocated region holding the contents of the
+ incoming argument registers. The __va_ndx field is an index
+ initialized to the position of the first unnamed (variable)
+ argument. This same index is also used to address the arguments
+ passed in memory. Thus, the __va_stk field is initialized to point
+ to the position of the first argument in memory offset to account
+ for the arguments passed in registers and to account for the size
+ of the argument registers not being 16-byte aligned. E.G., there
+ are 6 argument registers of 4 bytes each, but we want the __va_ndx
+ for the first stack argument to have the maximal alignment of 16
+ bytes, so we offset the __va_stk address by 32 bytes so that
+ __va_stk[32] references the first argument on the stack. */
+
+static tree
+xtensa_build_builtin_va_list (void)
+{
+ tree f_stk, f_reg, f_ndx, record, type_decl;
+
+ record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+ type_decl = build_decl (BUILTINS_LOCATION,
+ TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+ f_stk = build_decl (BUILTINS_LOCATION,
+ FIELD_DECL, get_identifier ("__va_stk"),
+ ptr_type_node);
+ f_reg = build_decl (BUILTINS_LOCATION,
+ FIELD_DECL, get_identifier ("__va_reg"),
+ ptr_type_node);
+ f_ndx = build_decl (BUILTINS_LOCATION,
+ FIELD_DECL, get_identifier ("__va_ndx"),
+ integer_type_node);
+
+ DECL_FIELD_CONTEXT (f_stk) = record;
+ DECL_FIELD_CONTEXT (f_reg) = record;
+ DECL_FIELD_CONTEXT (f_ndx) = record;
+
+ TYPE_STUB_DECL (record) = type_decl;
+ TYPE_NAME (record) = type_decl;
+ TYPE_FIELDS (record) = f_stk;
+ DECL_CHAIN (f_stk) = f_reg;
+ DECL_CHAIN (f_reg) = f_ndx;
+
+ layout_type (record);
+ return record;
+}
+
+
+/* Save the incoming argument registers on the stack. Returns the
+ address of the saved registers. */
+
+static rtx
+xtensa_builtin_saveregs (void)
+{
+ rtx gp_regs;
+ int arg_words = crtl->args.info.arg_words;
+ int gp_left = MAX_ARGS_IN_REGISTERS - arg_words;
+
+ if (gp_left <= 0)
+ return const0_rtx;
+
+ /* Allocate the general-purpose register space. */
+ gp_regs = assign_stack_local
+ (BLKmode, MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD, -1);
+ set_mem_alias_set (gp_regs, get_varargs_alias_set ());
+
+ /* Now store the incoming registers. */
+ cfun->machine->need_a7_copy = true;
+ cfun->machine->vararg_a7 = true;
+ move_block_from_reg (GP_ARG_FIRST + arg_words,
+ adjust_address (gp_regs, BLKmode,
+ arg_words * UNITS_PER_WORD),
+ gp_left);
+ gcc_assert (cfun->machine->vararg_a7_copy != 0);
+ emit_insn_before (cfun->machine->vararg_a7_copy, get_insns ());
+
+ return XEXP (gp_regs, 0);
+}
+
+
+/* Implement `va_start' for varargs and stdarg. We look at the
+ current function to fill in an initial va_list. */
+
+static void
+xtensa_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+ tree f_stk, stk;
+ tree f_reg, reg;
+ tree f_ndx, ndx;
+ tree t, u;
+ int arg_words;
+
+ arg_words = crtl->args.info.arg_words;
+
+ f_stk = TYPE_FIELDS (va_list_type_node);
+ f_reg = DECL_CHAIN (f_stk);
+ f_ndx = DECL_CHAIN (f_reg);
+
+ stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist, f_stk, NULL_TREE);
+ reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), unshare_expr (valist),
+ f_reg, NULL_TREE);
+ ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), unshare_expr (valist),
+ f_ndx, NULL_TREE);
+
+ /* Call __builtin_saveregs; save the result in __va_reg */
+ u = make_tree (sizetype, expand_builtin_saveregs ());
+ u = fold_convert (ptr_type_node, u);
+ t = build2 (MODIFY_EXPR, ptr_type_node, reg, u);
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+ /* Set the __va_stk member to ($arg_ptr - 32). */
+ u = make_tree (ptr_type_node, virtual_incoming_args_rtx);
+ u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u, size_int (-32));
+ t = build2 (MODIFY_EXPR, ptr_type_node, stk, u);
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+ /* Set the __va_ndx member. If the first variable argument is on
+ the stack, adjust __va_ndx by 2 words to account for the extra
+ alignment offset for __va_stk. */
+ if (arg_words >= MAX_ARGS_IN_REGISTERS)
+ arg_words += 2;
+ t = build2 (MODIFY_EXPR, integer_type_node, ndx,
+ build_int_cst (integer_type_node, arg_words * UNITS_PER_WORD));
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+
+/* Implement `va_arg'. */
+
+static tree
+xtensa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+ gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+ tree f_stk, stk;
+ tree f_reg, reg;
+ tree f_ndx, ndx;
+ tree type_size, array, orig_ndx, addr, size, va_size, t;
+ tree lab_false, lab_over, lab_false2;
+ bool indirect;
+
+ indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+ if (indirect)
+ type = build_pointer_type (type);
+
+ /* Handle complex values as separate real and imaginary parts. */
+ if (TREE_CODE (type) == COMPLEX_TYPE)
+ {
+ tree real_part, imag_part;
+
+ real_part = xtensa_gimplify_va_arg_expr (valist, TREE_TYPE (type),
+ pre_p, NULL);
+ real_part = get_initialized_tmp_var (real_part, pre_p, NULL);
+
+ imag_part = xtensa_gimplify_va_arg_expr (unshare_expr (valist),
+ TREE_TYPE (type),
+ pre_p, NULL);
+ imag_part = get_initialized_tmp_var (imag_part, pre_p, NULL);
+
+ return build2 (COMPLEX_EXPR, type, real_part, imag_part);
+ }
+
+ f_stk = TYPE_FIELDS (va_list_type_node);
+ f_reg = DECL_CHAIN (f_stk);
+ f_ndx = DECL_CHAIN (f_reg);
+
+ stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist,
+ f_stk, NULL_TREE);
+ reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), unshare_expr (valist),
+ f_reg, NULL_TREE);
+ ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), unshare_expr (valist),
+ f_ndx, NULL_TREE);
+
+ type_size = size_in_bytes (type);
+ va_size = round_up (type_size, UNITS_PER_WORD);
+ gimplify_expr (&va_size, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+
+ /* First align __va_ndx if necessary for this arg:
+
+ orig_ndx = (AP).__va_ndx;
+ if (__alignof__ (TYPE) > 4 )
+ orig_ndx = ((orig_ndx + __alignof__ (TYPE) - 1)
+ & -__alignof__ (TYPE)); */
+
+ orig_ndx = get_initialized_tmp_var (ndx, pre_p, NULL);
+
+ if (TYPE_ALIGN (type) > BITS_PER_WORD)
+ {
+ int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_UNIT;
+
+ t = build2 (PLUS_EXPR, integer_type_node, unshare_expr (orig_ndx),
+ build_int_cst (integer_type_node, align - 1));
+ t = build2 (BIT_AND_EXPR, integer_type_node, t,
+ build_int_cst (integer_type_node, -align));
+ gimplify_assign (unshare_expr (orig_ndx), t, pre_p);
+ }
+
+
+ /* Increment __va_ndx to point past the argument:
+
+ (AP).__va_ndx = orig_ndx + __va_size (TYPE); */
+
+ t = fold_convert (integer_type_node, va_size);
+ t = build2 (PLUS_EXPR, integer_type_node, orig_ndx, t);
+ gimplify_assign (unshare_expr (ndx), t, pre_p);
+
+
+ /* Check if the argument is in registers:
+
+ if ((AP).__va_ndx <= __MAX_ARGS_IN_REGISTERS * 4
+ && !must_pass_in_stack (type))
+ __array = (AP).__va_reg; */
+
+ array = create_tmp_var (ptr_type_node, NULL);
+
+ lab_over = NULL;
+ if (!targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
+ {
+ lab_false = create_artificial_label (UNKNOWN_LOCATION);
+ lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+ t = build2 (GT_EXPR, boolean_type_node, unshare_expr (ndx),
+ build_int_cst (integer_type_node,
+ MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD));
+ t = build3 (COND_EXPR, void_type_node, t,
+ build1 (GOTO_EXPR, void_type_node, lab_false),
+ NULL_TREE);
+ gimplify_and_add (t, pre_p);
+
+ gimplify_assign (unshare_expr (array), reg, pre_p);
+
+ t = build1 (GOTO_EXPR, void_type_node, lab_over);
+ gimplify_and_add (t, pre_p);
+
+ t = build1 (LABEL_EXPR, void_type_node, lab_false);
+ gimplify_and_add (t, pre_p);
+ }
+
+
+ /* ...otherwise, the argument is on the stack (never split between
+ registers and the stack -- change __va_ndx if necessary):
+
+ else
+ {
+ if (orig_ndx <= __MAX_ARGS_IN_REGISTERS * 4)
+ (AP).__va_ndx = 32 + __va_size (TYPE);
+ __array = (AP).__va_stk;
+ } */
+
+ lab_false2 = create_artificial_label (UNKNOWN_LOCATION);
+
+ t = build2 (GT_EXPR, boolean_type_node, unshare_expr (orig_ndx),
+ build_int_cst (integer_type_node,
+ MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD));
+ t = build3 (COND_EXPR, void_type_node, t,
+ build1 (GOTO_EXPR, void_type_node, lab_false2),
+ NULL_TREE);
+ gimplify_and_add (t, pre_p);
+
+ t = size_binop (PLUS_EXPR, unshare_expr (va_size), size_int (32));
+ t = fold_convert (integer_type_node, t);
+ gimplify_assign (unshare_expr (ndx), t, pre_p);
+
+ t = build1 (LABEL_EXPR, void_type_node, lab_false2);
+ gimplify_and_add (t, pre_p);
+
+ gimplify_assign (array, stk, pre_p);
+
+ if (lab_over)
+ {
+ t = build1 (LABEL_EXPR, void_type_node, lab_over);
+ gimplify_and_add (t, pre_p);
+ }
+
+
+ /* Given the base array pointer (__array) and index to the subsequent
+ argument (__va_ndx), find the address:
+
+ __array + (AP).__va_ndx - (BYTES_BIG_ENDIAN && sizeof (TYPE) < 4
+ ? sizeof (TYPE)
+ : __va_size (TYPE))
+
+ The results are endian-dependent because values smaller than one word
+ are aligned differently. */
+
+
+ if (BYTES_BIG_ENDIAN && TREE_CODE (type_size) == INTEGER_CST)
+ {
+ t = fold_build2 (GE_EXPR, boolean_type_node, unshare_expr (type_size),
+ size_int (PARM_BOUNDARY / BITS_PER_UNIT));
+ t = fold_build3 (COND_EXPR, sizetype, t, unshare_expr (va_size),
+ unshare_expr (type_size));
+ size = t;
+ }
+ else
+ size = unshare_expr (va_size);
+
+ t = fold_convert (sizetype, unshare_expr (ndx));
+ t = build2 (MINUS_EXPR, sizetype, t, size);
+ addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (array), t);
+
+ addr = fold_convert (build_pointer_type (type), addr);
+ if (indirect)
+ addr = build_va_arg_indirect_ref (addr);
+ return build_va_arg_indirect_ref (addr);
+}
+
+
+/* Builtins. */
+
+enum xtensa_builtin
+{
+ XTENSA_BUILTIN_UMULSIDI3,
+ XTENSA_BUILTIN_THREAD_POINTER,
+ XTENSA_BUILTIN_SET_THREAD_POINTER,
+ XTENSA_BUILTIN_max
+};
+
+
+static void
+xtensa_init_builtins (void)
+{
+ tree ftype, decl;
+
+ ftype = build_function_type_list (unsigned_intDI_type_node,
+ unsigned_intSI_type_node,
+ unsigned_intSI_type_node, NULL_TREE);
+
+ decl = add_builtin_function ("__builtin_umulsidi3", ftype,
+ XTENSA_BUILTIN_UMULSIDI3, BUILT_IN_MD,
+ "__umulsidi3", NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ TREE_READONLY (decl) = 1;
+
+ if (TARGET_THREADPTR)
+ {
+ ftype = build_function_type (ptr_type_node, void_list_node);
+ decl = add_builtin_function ("__builtin_thread_pointer", ftype,
+ XTENSA_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
+ NULL, NULL_TREE);
+ TREE_READONLY (decl) = 1;
+ TREE_NOTHROW (decl) = 1;
+
+ ftype = build_function_type_list (void_type_node, ptr_type_node,
+ NULL_TREE);
+ decl = add_builtin_function ("__builtin_set_thread_pointer", ftype,
+ XTENSA_BUILTIN_SET_THREAD_POINTER,
+ BUILT_IN_MD, NULL, NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+}
+
+
+static tree
+xtensa_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
+ bool ignore ATTRIBUTE_UNUSED)
+{
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+ tree arg0, arg1;
+
+ switch (fcode)
+ {
+ case XTENSA_BUILTIN_UMULSIDI3:
+ arg0 = args[0];
+ arg1 = args[1];
+ if ((TREE_CODE (arg0) == INTEGER_CST && TREE_CODE (arg1) == INTEGER_CST)
+ || TARGET_MUL32_HIGH)
+ return fold_build2 (MULT_EXPR, unsigned_intDI_type_node,
+ fold_convert (unsigned_intDI_type_node, arg0),
+ fold_convert (unsigned_intDI_type_node, arg1));
+ break;
+
+ case XTENSA_BUILTIN_THREAD_POINTER:
+ case XTENSA_BUILTIN_SET_THREAD_POINTER:
+ break;
+
+ default:
+ internal_error ("bad builtin code");
+ break;
+ }
+
+ return NULL;
+}
+
+
+static rtx
+xtensa_expand_builtin (tree exp, rtx target,
+ rtx subtarget ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ int ignore)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+ rtx arg;
+
+ switch (fcode)
+ {
+ case XTENSA_BUILTIN_UMULSIDI3:
+ /* The umulsidi3 builtin is just a mechanism to avoid calling the real
+ __umulsidi3 function when the Xtensa configuration can directly
+ implement it. If not, just call the function. */
+ return expand_call (exp, target, ignore);
+
+ case XTENSA_BUILTIN_THREAD_POINTER:
+ if (!target || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+ emit_insn (gen_load_tp (target));
+ return target;
+
+ case XTENSA_BUILTIN_SET_THREAD_POINTER:
+ arg = expand_normal (CALL_EXPR_ARG (exp, 0));
+ if (!register_operand (arg, Pmode))
+ arg = copy_to_mode_reg (Pmode, arg);
+ emit_insn (gen_set_tp (arg));
+ return const0_rtx;
+
+ default:
+ internal_error ("bad builtin code");
+ }
+ return NULL_RTX;
+}
+
+/* Worker function for TARGET_PREFERRED_RELOAD_CLASS. */
+
+static reg_class_t
+xtensa_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+ if (CONSTANT_P (x) && CONST_DOUBLE_P (x))
+ return NO_REGS;
+
+ /* Don't use the stack pointer or hard frame pointer for reloads!
+ The hard frame pointer would normally be OK except that it may
+ briefly hold an incoming argument in the prologue, and reload
+ won't know that it is live because the hard frame pointer is
+ treated specially. */
+
+ if (rclass == AR_REGS || rclass == GR_REGS)
+ return RL_REGS;
+
+ return rclass;
+}
+
+/* Worker function for TARGET_PREFERRED_OUTPUT_RELOAD_CLASS. */
+
+static reg_class_t
+xtensa_preferred_output_reload_class (rtx x ATTRIBUTE_UNUSED,
+ reg_class_t rclass)
+{
+ /* Don't use the stack pointer or hard frame pointer for reloads!
+ The hard frame pointer would normally be OK except that it may
+ briefly hold an incoming argument in the prologue, and reload
+ won't know that it is live because the hard frame pointer is
+ treated specially. */
+
+ if (rclass == AR_REGS || rclass == GR_REGS)
+ return RL_REGS;
+
+ return rclass;
+}
+
+/* Worker function for TARGET_SECONDARY_RELOAD. */
+
+static reg_class_t
+xtensa_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
+ enum machine_mode mode, secondary_reload_info *sri)
+{
+ int regno;
+
+ if (in_p && constantpool_mem_p (x))
+ {
+ if (rclass == FP_REGS)
+ return RL_REGS;
+
+ if (mode == QImode)
+ sri->icode = CODE_FOR_reloadqi_literal;
+ else if (mode == HImode)
+ sri->icode = CODE_FOR_reloadhi_literal;
+ }
+
+ regno = xt_true_regnum (x);
+ if (ACC_REG_P (regno))
+ return ((rclass == GR_REGS || rclass == RL_REGS) ? NO_REGS : RL_REGS);
+ if (rclass == ACC_REG)
+ return (GP_REG_P (regno) ? NO_REGS : RL_REGS);
+
+ return NO_REGS;
+}
+
+
+void
+order_regs_for_local_alloc (void)
+{
+ if (!leaf_function_p ())
+ {
+ memcpy (reg_alloc_order, reg_nonleaf_alloc_order,
+ FIRST_PSEUDO_REGISTER * sizeof (int));
+ }
+ else
+ {
+ int i, num_arg_regs;
+ int nxt = 0;
+
+ /* Use the AR registers in increasing order (skipping a0 and a1)
+ but save the incoming argument registers for a last resort. */
+ num_arg_regs = crtl->args.info.arg_words;
+ if (num_arg_regs > MAX_ARGS_IN_REGISTERS)
+ num_arg_regs = MAX_ARGS_IN_REGISTERS;
+ for (i = GP_ARG_FIRST; i < 16 - num_arg_regs; i++)
+ reg_alloc_order[nxt++] = i + num_arg_regs;
+ for (i = 0; i < num_arg_regs; i++)
+ reg_alloc_order[nxt++] = GP_ARG_FIRST + i;
+
+ /* List the coprocessor registers in order. */
+ for (i = 0; i < BR_REG_NUM; i++)
+ reg_alloc_order[nxt++] = BR_REG_FIRST + i;
+
+ /* List the FP registers in order for now. */
+ for (i = 0; i < 16; i++)
+ reg_alloc_order[nxt++] = FP_REG_FIRST + i;
+
+ /* GCC requires that we list *all* the registers.... */
+ reg_alloc_order[nxt++] = 0; /* a0 = return address */
+ reg_alloc_order[nxt++] = 1; /* a1 = stack pointer */
+ reg_alloc_order[nxt++] = 16; /* pseudo frame pointer */
+ reg_alloc_order[nxt++] = 17; /* pseudo arg pointer */
+
+ reg_alloc_order[nxt++] = ACC_REG_FIRST; /* MAC16 accumulator */
+ }
+}
+
+
+/* Some Xtensa targets support multiple bss sections. If the section
+ name ends with ".bss", add SECTION_BSS to the flags. */
+
+static unsigned int
+xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc)
+{
+ unsigned int flags = default_section_type_flags (decl, name, reloc);
+ const char *suffix;
+
+ suffix = strrchr (name, '.');
+ if (suffix && strcmp (suffix, ".bss") == 0)
+ {
+ if (!decl || (TREE_CODE (decl) == VAR_DECL
+ && DECL_INITIAL (decl) == NULL_TREE))
+ flags |= SECTION_BSS; /* @nobits */
+ else
+ warning (0, "only uninitialized variables can be placed in a "
+ ".bss section");
+ }
+
+ return flags;
+}
+
+
+/* The literal pool stays with the function. */
+
+static section *
+xtensa_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
+ rtx x ATTRIBUTE_UNUSED,
+ unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+ return function_section (current_function_decl);
+}
+
+/* Worker function for TARGET_REGISTER_MOVE_COST. */
+
+static int
+xtensa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+ reg_class_t from, reg_class_t to)
+{
+ if (from == to && from != BR_REGS && to != BR_REGS)
+ return 2;
+ else if (reg_class_subset_p (from, AR_REGS)
+ && reg_class_subset_p (to, AR_REGS))
+ return 2;
+ else if (reg_class_subset_p (from, AR_REGS) && to == ACC_REG)
+ return 3;
+ else if (from == ACC_REG && reg_class_subset_p (to, AR_REGS))
+ return 3;
+ else
+ return 10;
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST. */
+
+static int
+xtensa_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+ reg_class_t rclass ATTRIBUTE_UNUSED,
+ bool in ATTRIBUTE_UNUSED)
+{
+ return 4;
+}
+
+/* Compute a (partial) cost for rtx X. Return true if the complete
+ cost has been computed, and false if subexpressions should be
+ scanned. In either case, *TOTAL contains the cost result. */
+
+static bool
+xtensa_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed ATTRIBUTE_UNUSED)
+{
+ switch (code)
+ {
+ case CONST_INT:
+ switch (outer_code)
+ {
+ case SET:
+ if (xtensa_simm12b (INTVAL (x)))
+ {
+ *total = 4;
+ return true;
+ }
+ break;
+ case PLUS:
+ if (xtensa_simm8 (INTVAL (x))
+ || xtensa_simm8x256 (INTVAL (x)))
+ {
+ *total = 0;
+ return true;
+ }
+ break;
+ case AND:
+ if (xtensa_mask_immediate (INTVAL (x)))
+ {
+ *total = 0;
+ return true;
+ }
+ break;
+ case COMPARE:
+ if ((INTVAL (x) == 0) || xtensa_b4const (INTVAL (x)))
+ {
+ *total = 0;
+ return true;
+ }
+ break;
+ case ASHIFT:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ case ROTATE:
+ case ROTATERT:
+ /* No way to tell if X is the 2nd operand so be conservative. */
+ default: break;
+ }
+ if (xtensa_simm12b (INTVAL (x)))
+ *total = 5;
+ else if (TARGET_CONST16)
+ *total = COSTS_N_INSNS (2);
+ else
+ *total = 6;
+ return true;
+
+ case CONST:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ if (TARGET_CONST16)
+ *total = COSTS_N_INSNS (2);
+ else
+ *total = 5;
+ return true;
+
+ case CONST_DOUBLE:
+ if (TARGET_CONST16)
+ *total = COSTS_N_INSNS (4);
+ else
+ *total = 7;
+ return true;
+
+ case MEM:
+ {
+ int num_words =
+ (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) ? 2 : 1;
+
+ if (memory_address_p (GET_MODE (x), XEXP ((x), 0)))
+ *total = COSTS_N_INSNS (num_words);
+ else
+ *total = COSTS_N_INSNS (2*num_words);
+ return true;
+ }
+
+ case FFS:
+ case CTZ:
+ *total = COSTS_N_INSNS (TARGET_NSA ? 5 : 50);
+ return true;
+
+ case CLZ:
+ *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50);
+ return true;
+
+ case NOT:
+ *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 3 : 2);
+ return true;
+
+ case AND:
+ case IOR:
+ case XOR:
+ if (GET_MODE (x) == DImode)
+ *total = COSTS_N_INSNS (2);
+ else
+ *total = COSTS_N_INSNS (1);
+ return true;
+
+ case ASHIFT:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ if (GET_MODE (x) == DImode)
+ *total = COSTS_N_INSNS (50);
+ else
+ *total = COSTS_N_INSNS (1);
+ return true;
+
+ case ABS:
+ {
+ enum machine_mode xmode = GET_MODE (x);
+ if (xmode == SFmode)
+ *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50);
+ else if (xmode == DFmode)
+ *total = COSTS_N_INSNS (50);
+ else
+ *total = COSTS_N_INSNS (4);
+ return true;
+ }
+
+ case PLUS:
+ case MINUS:
+ {
+ enum machine_mode xmode = GET_MODE (x);
+ if (xmode == SFmode)
+ *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50);
+ else if (xmode == DFmode || xmode == DImode)
+ *total = COSTS_N_INSNS (50);
+ else
+ *total = COSTS_N_INSNS (1);
+ return true;
+ }
+
+ case NEG:
+ *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 4 : 2);
+ return true;
+
+ case MULT:
+ {
+ enum machine_mode xmode = GET_MODE (x);
+ if (xmode == SFmode)
+ *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 4 : 50);
+ else if (xmode == DFmode)
+ *total = COSTS_N_INSNS (50);
+ else if (xmode == DImode)
+ *total = COSTS_N_INSNS (TARGET_MUL32_HIGH ? 10 : 50);
+ else if (TARGET_MUL32)
+ *total = COSTS_N_INSNS (4);
+ else if (TARGET_MAC16)
+ *total = COSTS_N_INSNS (16);
+ else if (TARGET_MUL16)
+ *total = COSTS_N_INSNS (12);
+ else
+ *total = COSTS_N_INSNS (50);
+ return true;
+ }
+
+ case DIV:
+ case MOD:
+ {
+ enum machine_mode xmode = GET_MODE (x);
+ if (xmode == SFmode)
+ {
+ *total = COSTS_N_INSNS (TARGET_HARD_FLOAT_DIV ? 8 : 50);
+ return true;
+ }
+ else if (xmode == DFmode)
+ {
+ *total = COSTS_N_INSNS (50);
+ return true;
+ }
+ }
+ /* Fall through. */
+
+ case UDIV:
+ case UMOD:
+ {
+ enum machine_mode xmode = GET_MODE (x);
+ if (xmode == DImode)
+ *total = COSTS_N_INSNS (50);
+ else if (TARGET_DIV32)
+ *total = COSTS_N_INSNS (32);
+ else
+ *total = COSTS_N_INSNS (50);
+ return true;
+ }
+
+ case SQRT:
+ if (GET_MODE (x) == SFmode)
+ *total = COSTS_N_INSNS (TARGET_HARD_FLOAT_SQRT ? 8 : 50);
+ else
+ *total = COSTS_N_INSNS (50);
+ return true;
+
+ case SMIN:
+ case UMIN:
+ case SMAX:
+ case UMAX:
+ *total = COSTS_N_INSNS (TARGET_MINMAX ? 1 : 50);
+ return true;
+
+ case SIGN_EXTRACT:
+ case SIGN_EXTEND:
+ *total = COSTS_N_INSNS (TARGET_SEXT ? 1 : 2);
+ return true;
+
+ case ZERO_EXTRACT:
+ case ZERO_EXTEND:
+ *total = COSTS_N_INSNS (1);
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY. */
+
+static bool
+xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+ return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)
+ > 4 * UNITS_PER_WORD);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE. */
+
+rtx
+xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
+ bool outgoing)
+{
+ return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype)
+ && TYPE_PRECISION (valtype) < BITS_PER_WORD)
+ ? SImode : TYPE_MODE (valtype),
+ outgoing ? GP_OUTGOING_RETURN : GP_RETURN);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE. */
+
+static rtx
+xtensa_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+ return gen_rtx_REG ((GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+ ? SImode : mode, GP_RETURN);
+}
+
+/* Worker function TARGET_FUNCTION_VALUE_REGNO_P. */
+
+static bool
+xtensa_function_value_regno_p (const unsigned int regno)
+{
+ return (regno == GP_RETURN);
+}
+
+/* The static chain is passed in memory. Provide rtx giving 'mem'
+ expressions that denote where they are stored. */
+
+static rtx
+xtensa_static_chain (const_tree ARG_UNUSED (fndecl), bool incoming_p)
+{
+ rtx base = incoming_p ? arg_pointer_rtx : stack_pointer_rtx;
+ return gen_frame_mem (Pmode, plus_constant (base, -5 * UNITS_PER_WORD));
+}
+
+
+/* TRAMPOLINE_TEMPLATE: For Xtensa, the trampoline must perform an ENTRY
+ instruction with a minimal stack frame in order to get some free
+ registers. Once the actual call target is known, the proper stack frame
+ size is extracted from the ENTRY instruction at the target and the
+ current frame is adjusted to match. The trampoline then transfers
+ control to the instruction following the ENTRY at the target. Note:
+ this assumes that the target begins with an ENTRY instruction. */
+
+static void
+xtensa_asm_trampoline_template (FILE *stream)
+{
+ bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
+
+ fprintf (stream, "\t.begin no-transform\n");
+ fprintf (stream, "\tentry\tsp, %d\n", MIN_FRAME_SIZE);
+
+ if (use_call0)
+ {
+ /* Save the return address. */
+ fprintf (stream, "\tmov\ta10, a0\n");
+
+ /* Use a CALL0 instruction to skip past the constants and in the
+ process get the PC into A0. This allows PC-relative access to
+ the constants without relying on L32R. */
+ fprintf (stream, "\tcall0\t.Lskipconsts\n");
+ }
+ else
+ fprintf (stream, "\tj\t.Lskipconsts\n");
+
+ fprintf (stream, "\t.align\t4\n");
+ fprintf (stream, ".Lchainval:%s0\n", integer_asm_op (4, TRUE));
+ fprintf (stream, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE));
+ fprintf (stream, ".Lskipconsts:\n");
+
+ /* Load the static chain and function address from the trampoline. */
+ if (use_call0)
+ {
+ fprintf (stream, "\taddi\ta0, a0, 3\n");
+ fprintf (stream, "\tl32i\ta9, a0, 0\n");
+ fprintf (stream, "\tl32i\ta8, a0, 4\n");
+ }
+ else
+ {
+ fprintf (stream, "\tl32r\ta9, .Lchainval\n");
+ fprintf (stream, "\tl32r\ta8, .Lfnaddr\n");
+ }
+
+ /* Store the static chain. */
+ fprintf (stream, "\ts32i\ta9, sp, %d\n", MIN_FRAME_SIZE - 20);
+
+ /* Set the proper stack pointer value. */
+ fprintf (stream, "\tl32i\ta9, a8, 0\n");
+ fprintf (stream, "\textui\ta9, a9, %d, 12\n",
+ TARGET_BIG_ENDIAN ? 8 : 12);
+ fprintf (stream, "\tslli\ta9, a9, 3\n");
+ fprintf (stream, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE);
+ fprintf (stream, "\tsub\ta9, sp, a9\n");
+ fprintf (stream, "\tmovsp\tsp, a9\n");
+
+ if (use_call0)
+ /* Restore the return address. */
+ fprintf (stream, "\tmov\ta0, a10\n");
+
+ /* Jump to the instruction following the ENTRY. */
+ fprintf (stream, "\taddi\ta8, a8, 3\n");
+ fprintf (stream, "\tjx\ta8\n");
+
+ /* Pad size to a multiple of TRAMPOLINE_ALIGNMENT. */
+ if (use_call0)
+ fprintf (stream, "\t.byte\t0\n");
+ else
+ fprintf (stream, "\tnop\n");
+
+ fprintf (stream, "\t.end no-transform\n");
+}
+
+static void
+xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain)
+{
+ rtx func = XEXP (DECL_RTL (fndecl), 0);
+ bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
+ int chain_off = use_call0 ? 12 : 8;
+ int func_off = use_call0 ? 16 : 12;
+
+ emit_block_move (m_tramp, assemble_trampoline_template (),
+ GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+ emit_move_insn (adjust_address (m_tramp, SImode, chain_off), chain);
+ emit_move_insn (adjust_address (m_tramp, SImode, func_off), func);
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"),
+ LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+}
+
+
+#include "gt-xtensa.h"
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
new file mode 100644
index 000000000..0a096cdb5
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.h
@@ -0,0 +1,847 @@
+/* Definitions of Tensilica's Xtensa target machine for GNU compiler.
+ Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+ Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Get Xtensa configuration settings */
+#include "xtensa-config.h"
+
+/* External variables defined in xtensa.c. */
+
+extern unsigned xtensa_current_frame_size;
+
+/* Macros used in the machine description to select various Xtensa
+ configuration options. */
+#ifndef XCHAL_HAVE_MUL32_HIGH
+#define XCHAL_HAVE_MUL32_HIGH 0
+#endif
+#ifndef XCHAL_HAVE_RELEASE_SYNC
+#define XCHAL_HAVE_RELEASE_SYNC 0
+#endif
+#ifndef XCHAL_HAVE_S32C1I
+#define XCHAL_HAVE_S32C1I 0
+#endif
+#ifndef XCHAL_HAVE_THREADPTR
+#define XCHAL_HAVE_THREADPTR 0
+#endif
+#define TARGET_BIG_ENDIAN XCHAL_HAVE_BE
+#define TARGET_DENSITY XCHAL_HAVE_DENSITY
+#define TARGET_MAC16 XCHAL_HAVE_MAC16
+#define TARGET_MUL16 XCHAL_HAVE_MUL16
+#define TARGET_MUL32 XCHAL_HAVE_MUL32
+#define TARGET_MUL32_HIGH XCHAL_HAVE_MUL32_HIGH
+#define TARGET_DIV32 XCHAL_HAVE_DIV32
+#define TARGET_NSA XCHAL_HAVE_NSA
+#define TARGET_MINMAX XCHAL_HAVE_MINMAX
+#define TARGET_SEXT XCHAL_HAVE_SEXT
+#define TARGET_BOOLEANS XCHAL_HAVE_BOOLEANS
+#define TARGET_HARD_FLOAT XCHAL_HAVE_FP
+#define TARGET_HARD_FLOAT_DIV XCHAL_HAVE_FP_DIV
+#define TARGET_HARD_FLOAT_RECIP XCHAL_HAVE_FP_RECIP
+#define TARGET_HARD_FLOAT_SQRT XCHAL_HAVE_FP_SQRT
+#define TARGET_HARD_FLOAT_RSQRT XCHAL_HAVE_FP_RSQRT
+#define TARGET_ABS XCHAL_HAVE_ABS
+#define TARGET_ADDX XCHAL_HAVE_ADDX
+#define TARGET_RELEASE_SYNC XCHAL_HAVE_RELEASE_SYNC
+#define TARGET_S32C1I XCHAL_HAVE_S32C1I
+#define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS
+#define TARGET_THREADPTR XCHAL_HAVE_THREADPTR
+
+#define TARGET_DEFAULT \
+ ((XCHAL_HAVE_L32R ? 0 : MASK_CONST16) | \
+ MASK_SERIALIZE_VOLATILE)
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+
+/* Target CPU builtins. */
+#define TARGET_CPU_CPP_BUILTINS() \
+ do { \
+ builtin_assert ("cpu=xtensa"); \
+ builtin_assert ("machine=xtensa"); \
+ builtin_define ("__xtensa__"); \
+ builtin_define ("__XTENSA__"); \
+ builtin_define ("__XTENSA_WINDOWED_ABI__"); \
+ builtin_define (TARGET_BIG_ENDIAN ? "__XTENSA_EB__" : "__XTENSA_EL__"); \
+ if (!TARGET_HARD_FLOAT) \
+ builtin_define ("__XTENSA_SOFT_FLOAT__"); \
+ } while (0)
+
+#define CPP_SPEC " %(subtarget_cpp_spec) "
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#define EXTRA_SPECS \
+ { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+ in instructions that operate on numbered bit-fields. */
+#define BITS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this if most significant byte of a word is the lowest numbered. */
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this if most significant word of a multiword number is the lowest. */
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+#define MAX_BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes). */
+#define UNITS_PER_WORD 4
+#define MIN_UNITS_PER_WORD 4
+
+/* Width of a floating point register. */
+#define UNITS_PER_FPREG 4
+
+/* Size in bits of various types on the target machine. */
+#define INT_TYPE_SIZE 32
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* Allocation boundary (in *bits*) for storing pointers in memory. */
+#define POINTER_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list. */
+#define PARM_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for the code of a function. */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after 'int : 0' in a structure. */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this. */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* There is no point aligning anything to a rounder boundary than this. */
+#define BIGGEST_ALIGNMENT 128
+
+/* Set this nonzero if move instructions will actually fail to work
+ when given unaligned data. */
+#define STRICT_ALIGNMENT 1
+
+/* Promote integer modes smaller than a word to SImode. Set UNSIGNEDP
+ for QImode, because there is no 8-bit load from memory with sign
+ extension. Otherwise, leave UNSIGNEDP alone, since Xtensa has 16-bit
+ loads both with and without sign extension. */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+ do { \
+ if (GET_MODE_CLASS (MODE) == MODE_INT \
+ && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+ { \
+ if ((MODE) == QImode) \
+ (UNSIGNEDP) = 1; \
+ (MODE) = SImode; \
+ } \
+ } while (0)
+
+/* Imitate the way many other C compilers handle alignment of
+ bitfields and the structures that contain them. */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Disable the use of word-sized or smaller complex modes for structures,
+ and for function arguments in particular, where they cause problems with
+ register a7. The xtensa_copy_incoming_a7 function assumes that there is
+ a single reference to an argument in a7, but with small complex modes the
+ real and imaginary components may be extracted separately, leading to two
+ uses of the register, only one of which would be replaced. */
+#define MEMBER_TYPE_FORCES_BLK(FIELD, MODE) \
+ ((MODE) == CQImode || (MODE) == CHImode)
+
+/* Align string constants and constructors to at least a word boundary.
+ The typical use of this macro is to increase alignment for string
+ constants to be word aligned so that 'strcpy' calls that copy
+ constants can be done inline. */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN) \
+ ((TREE_CODE (EXP) == STRING_CST || TREE_CODE (EXP) == CONSTRUCTOR) \
+ && (ALIGN) < BITS_PER_WORD \
+ ? BITS_PER_WORD \
+ : (ALIGN))
+
+/* Align arrays, unions and records to at least a word boundary.
+ One use of this macro is to increase alignment of medium-size
+ data to make it all fit in fewer cache lines. Another is to
+ cause character arrays to be word-aligned so that 'strcpy' calls
+ that copy constants to character arrays can be done inline. */
+#undef DATA_ALIGNMENT
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+ ((((ALIGN) < BITS_PER_WORD) \
+ && (TREE_CODE (TYPE) == ARRAY_TYPE \
+ || TREE_CODE (TYPE) == UNION_TYPE \
+ || TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* Operations between registers always perform the operation
+ on the full register even if a narrower mode is specified. */
+#define WORD_REGISTER_OPERATIONS
+
+/* Xtensa loads are zero-extended by default. */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Standard register usage. */
+
+/* Number of actual hardware registers.
+ The hardware registers are assigned numbers for the compiler
+ from 0 to just below FIRST_PSEUDO_REGISTER.
+ All registers that the compiler knows about must be given numbers,
+ even those that are not normally considered general registers.
+
+ The fake frame pointer and argument pointer will never appear in
+ the generated code, since they will always be eliminated and replaced
+ by either the stack pointer or the hard frame pointer.
+
+ 0 - 15 AR[0] - AR[15]
+ 16 FRAME_POINTER (fake = initial sp)
+ 17 ARG_POINTER (fake = initial sp + framesize)
+ 18 BR[0] for floating-point CC
+ 19 - 34 FR[0] - FR[15]
+ 35 MAC16 accumulator */
+
+#define FIRST_PSEUDO_REGISTER 36
+
+/* Return the stabs register number to use for REGNO. */
+#define DBX_REGISTER_NUMBER(REGNO) xtensa_dbx_register_number (REGNO)
+
+/* 1 for registers that have pervasive standard uses
+ and are not available for the register allocator. */
+#define FIXED_REGISTERS \
+{ \
+ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 1, 1, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, \
+}
+
+/* 1 for registers not available across function calls.
+ These must include the FIXED_REGISTERS and also any
+ registers that can be used without being saved.
+ The latter must include the registers where values are returned
+ and the register where structure-value addresses are passed.
+ Aside from that, you can include as many other registers as you like. */
+#define CALL_USED_REGISTERS \
+{ \
+ 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, \
+}
+
+/* For non-leaf procedures on Xtensa processors, the allocation order
+ is as specified below by REG_ALLOC_ORDER. For leaf procedures, we
+ want to use the lowest numbered registers first to minimize
+ register window overflows. However, local-alloc is not smart
+ enough to consider conflicts with incoming arguments. If an
+ incoming argument in a2 is live throughout the function and
+ local-alloc decides to use a2, then the incoming argument must
+ either be spilled or copied to another register. To get around
+ this, we define ADJUST_REG_ALLOC_ORDER to redefine
+ reg_alloc_order for leaf functions such that lowest numbered
+ registers are used first with the exception that the incoming
+ argument registers are not used until after other register choices
+ have been exhausted. */
+
+#define REG_ALLOC_ORDER \
+{ 8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, \
+ 18, \
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \
+ 0, 1, 16, 17, \
+ 35, \
+}
+
+#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
+
+/* For Xtensa, the only point of this is to prevent GCC from otherwise
+ giving preference to call-used registers. To minimize window
+ overflows for the AR registers, we want to give preference to the
+ lower-numbered AR registers. For other register files, which are
+ not windowed, we still prefer call-used registers, if there are any. */
+extern const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER];
+#define LEAF_REGISTERS xtensa_leaf_regs
+
+/* For Xtensa, no remapping is necessary, but this macro must be
+ defined if LEAF_REGISTERS is defined. */
+#define LEAF_REG_REMAP(REGNO) (REGNO)
+
+/* This must be declared if LEAF_REGISTERS is set. */
+extern int leaf_function;
+
+/* Internal macros to classify a register number. */
+
+/* 16 address registers + fake registers */
+#define GP_REG_FIRST 0
+#define GP_REG_LAST 17
+#define GP_REG_NUM (GP_REG_LAST - GP_REG_FIRST + 1)
+
+/* Coprocessor registers */
+#define BR_REG_FIRST 18
+#define BR_REG_LAST 18
+#define BR_REG_NUM (BR_REG_LAST - BR_REG_FIRST + 1)
+
+/* 16 floating-point registers */
+#define FP_REG_FIRST 19
+#define FP_REG_LAST 34
+#define FP_REG_NUM (FP_REG_LAST - FP_REG_FIRST + 1)
+
+/* MAC16 accumulator */
+#define ACC_REG_FIRST 35
+#define ACC_REG_LAST 35
+#define ACC_REG_NUM (ACC_REG_LAST - ACC_REG_FIRST + 1)
+
+#define GP_REG_P(REGNO) ((unsigned) ((REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+#define BR_REG_P(REGNO) ((unsigned) ((REGNO) - BR_REG_FIRST) < BR_REG_NUM)
+#define FP_REG_P(REGNO) ((unsigned) ((REGNO) - FP_REG_FIRST) < FP_REG_NUM)
+#define ACC_REG_P(REGNO) ((unsigned) ((REGNO) - ACC_REG_FIRST) < ACC_REG_NUM)
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+ to hold something of mode MODE. */
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+ (FP_REG_P (REGNO) ? \
+ ((GET_MODE_SIZE (MODE) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG) : \
+ ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+ MODE. */
+extern char xtensa_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER];
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+ xtensa_hard_regno_mode_ok[(int) (MODE)][(REGNO)]
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+ when one has mode MODE1 and one has mode MODE2.
+ If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+ for any hard reg, then this must be 0 for correct output. */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+ ((GET_MODE_CLASS (MODE1) == MODE_FLOAT || \
+ GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT) \
+ == (GET_MODE_CLASS (MODE2) == MODE_FLOAT || \
+ GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+/* Register to use for pushing function arguments. */
+#define STACK_POINTER_REGNUM (GP_REG_FIRST + 1)
+
+/* Base register for access to local variables of the function. */
+#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 7)
+
+/* The register number of the frame pointer register, which is used to
+ access automatic variables in the stack frame. For Xtensa, this
+ register never appears in the output. It is always eliminated to
+ either the stack pointer or the hard frame pointer. */
+#define FRAME_POINTER_REGNUM (GP_REG_FIRST + 16)
+
+/* Base register for access to arguments of the function. */
+#define ARG_POINTER_REGNUM (GP_REG_FIRST + 17)
+
+/* For now we don't try to use the full set of boolean registers. Without
+ software pipelining of FP operations, there's not much to gain and it's
+ a real pain to get them reloaded. */
+#define FPCC_REGNUM (BR_REG_FIRST + 0)
+
+/* It is as good or better to call a constant function address than to
+ call an address kept in a register. */
+#define NO_FUNCTION_CSE 1
+
+/* Xtensa processors have "register windows". GCC does not currently
+ take advantage of the possibility for variable-sized windows; instead,
+ we use a fixed window size of 8. */
+
+#define INCOMING_REGNO(OUT) \
+ ((GP_REG_P (OUT) && \
+ ((unsigned) ((OUT) - GP_REG_FIRST) >= WINDOW_SIZE)) ? \
+ (OUT) - WINDOW_SIZE : (OUT))
+
+#define OUTGOING_REGNO(IN) \
+ ((GP_REG_P (IN) && \
+ ((unsigned) ((IN) - GP_REG_FIRST) < WINDOW_SIZE)) ? \
+ (IN) + WINDOW_SIZE : (IN))
+
+
+/* Define the classes of registers for register constraints in the
+ machine description. */
+enum reg_class
+{
+ NO_REGS, /* no registers in set */
+ BR_REGS, /* coprocessor boolean registers */
+ FP_REGS, /* floating point registers */
+ ACC_REG, /* MAC16 accumulator */
+ SP_REG, /* sp register (aka a1) */
+ RL_REGS, /* preferred reload regs (not sp or fp) */
+ GR_REGS, /* integer registers except sp */
+ AR_REGS, /* all integer registers */
+ ALL_REGS, /* all registers */
+ LIM_REG_CLASSES /* max value + 1 */
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define GENERAL_REGS AR_REGS
+
+/* An initializer containing the names of the register classes as C
+ string constants. These names are used in writing some of the
+ debugging dumps. */
+#define REG_CLASS_NAMES \
+{ \
+ "NO_REGS", \
+ "BR_REGS", \
+ "FP_REGS", \
+ "ACC_REG", \
+ "SP_REG", \
+ "RL_REGS", \
+ "GR_REGS", \
+ "AR_REGS", \
+ "ALL_REGS" \
+}
+
+/* Contents of the register classes. The Nth integer specifies the
+ contents of class N. The way the integer MASK is interpreted is
+ that register R is in the class if 'MASK & (1 << R)' is 1. */
+#define REG_CLASS_CONTENTS \
+{ \
+ { 0x00000000, 0x00000000 }, /* no registers */ \
+ { 0x00040000, 0x00000000 }, /* coprocessor boolean registers */ \
+ { 0xfff80000, 0x00000007 }, /* floating-point registers */ \
+ { 0x00000000, 0x00000008 }, /* MAC16 accumulator */ \
+ { 0x00000002, 0x00000000 }, /* stack pointer register */ \
+ { 0x0000ff7d, 0x00000000 }, /* preferred reload registers */ \
+ { 0x0000fffd, 0x00000000 }, /* general-purpose registers */ \
+ { 0x0003ffff, 0x00000000 }, /* integer registers */ \
+ { 0xffffffff, 0x0000000f } /* all registers */ \
+}
+
+#define IRA_COVER_CLASSES \
+{ \
+ BR_REGS, FP_REGS, ACC_REG, AR_REGS, LIM_REG_CLASSES \
+}
+
+/* A C expression whose value is a register class containing hard
+ register REGNO. In general there is more that one such class;
+ choose a class which is "minimal", meaning that no smaller class
+ also contains the register. */
+extern const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER];
+
+#define REGNO_REG_CLASS(REGNO) xtensa_regno_to_class[ (REGNO) ]
+
+/* Use the Xtensa AR register file for base registers.
+ No index registers. */
+#define BASE_REG_CLASS AR_REGS
+#define INDEX_REG_CLASS NO_REGS
+
+/* The small_register_classes_for_mode_p hook must always return true for
+ Xtrnase, because all of the 16 AR registers may be explicitly used in
+ the RTL, as either incoming or outgoing arguments. */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Return the maximum number of consecutive registers
+ needed to represent mode MODE in a register of class CLASS. */
+#define CLASS_UNITS(mode, size) \
+ ((GET_MODE_SIZE (mode) + (size) - 1) / (size))
+
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+ (CLASS_UNITS (MODE, UNITS_PER_WORD))
+
+
+/* Stack layout; function entry, exit and calling. */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Offset within stack frame to start allocating local variables at. */
+#define STARTING_FRAME_OFFSET \
+ crtl->outgoing_args_size
+
+/* The ARG_POINTER and FRAME_POINTER are not real Xtensa registers, so
+ they are eliminated to either the stack pointer or hard frame pointer. */
+#define ELIMINABLE_REGS \
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+/* Specify the initial difference between the specified pair of registers. */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+ do { \
+ compute_frame_size (get_frame_size ()); \
+ switch (FROM) \
+ { \
+ case FRAME_POINTER_REGNUM: \
+ (OFFSET) = 0; \
+ break; \
+ case ARG_POINTER_REGNUM: \
+ (OFFSET) = xtensa_current_frame_size; \
+ break; \
+ default: \
+ gcc_unreachable (); \
+ } \
+ } while (0)
+
+/* If defined, the maximum amount of space required for outgoing
+ arguments will be computed and placed into the variable
+ 'crtl->outgoing_args_size'. No space will be pushed
+ onto the stack for each call; instead, the function prologue
+ should increase the stack frame size by this amount. */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset from the argument pointer register to the first argument's
+ address. On some machines it may depend on the data type of the
+ function. If 'ARGS_GROW_DOWNWARD', this is the offset to the
+ location above the first argument's address. */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Align stack frames on 128 bits for Xtensa. This is necessary for
+ 128-bit datatypes defined in TIE (e.g., for Vectra). */
+#define STACK_BOUNDARY 128
+
+/* Use a fixed register window size of 8. */
+#define WINDOW_SIZE 8
+
+/* Symbolic macros for the registers used to return integer, floating
+ point, and values of coprocessor and user-defined modes. */
+#define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE)
+#define GP_OUTGOING_RETURN (GP_REG_FIRST + 2)
+
+/* Symbolic macros for the first/last argument registers. */
+#define GP_ARG_FIRST (GP_REG_FIRST + 2)
+#define GP_ARG_LAST (GP_REG_FIRST + 7)
+#define GP_OUTGOING_ARG_FIRST (GP_REG_FIRST + 2 + WINDOW_SIZE)
+#define GP_OUTGOING_ARG_LAST (GP_REG_FIRST + 7 + WINDOW_SIZE)
+
+#define MAX_ARGS_IN_REGISTERS 6
+
+/* Don't worry about compatibility with PCC. */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* A C expression that is nonzero if REGNO is the number of a hard
+ register in which function arguments are sometimes passed. This
+ does *not* include implicit arguments such as the static chain and
+ the structure-value address. On many machines, no registers can be
+ used for this purpose since all function arguments are pushed on
+ the stack. */
+#define FUNCTION_ARG_REGNO_P(N) \
+ ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST)
+
+/* Record the number of argument words seen so far, along with a flag to
+ indicate whether these are incoming arguments. (FUNCTION_INCOMING_ARG
+ is used for both incoming and outgoing args, so a separate flag is
+ needed. */
+typedef struct xtensa_args
+{
+ int arg_words;
+ int incoming;
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ init_cumulative_args (&CUM, 0)
+
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \
+ init_cumulative_args (&CUM, 1)
+
+/* Profiling Xtensa code is typically done with the built-in profiling
+ feature of Tensilica's instruction set simulator, which does not
+ require any compiler support. Profiling code on a real (i.e.,
+ non-simulated) Xtensa processor is currently only supported by
+ GNU/Linux with glibc. The glibc version of _mcount doesn't require
+ counter variables. The _mcount function needs the current PC and
+ the current return address to identify an arc in the call graph.
+ Pass the current return address as the first argument; the current
+ PC is available as a0 in _mcount's register window. Both of these
+ values contain window size information in the two most significant
+ bits; we assume that _mcount will mask off those bits. The call to
+ _mcount uses a window size of 8 to make sure that it doesn't clobber
+ any incoming argument values. */
+
+#define NO_PROFILE_COUNTERS 1
+
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+ do { \
+ fprintf (FILE, "\t%s\ta10, a0\n", TARGET_DENSITY ? "mov.n" : "mov"); \
+ if (flag_pic) \
+ { \
+ fprintf (FILE, "\tmovi\ta8, _mcount@PLT\n"); \
+ fprintf (FILE, "\tcallx8\ta8\n"); \
+ } \
+ else \
+ fprintf (FILE, "\tcall8\t_mcount\n"); \
+ } while (0)
+
+/* Stack pointer value doesn't matter at exit. */
+#define EXIT_IGNORE_STACK 1
+
+/* Size in bytes of the trampoline, as an integer. Make sure this is
+ a multiple of TRAMPOLINE_ALIGNMENT to avoid -Wpadded warnings. */
+#define TRAMPOLINE_SIZE (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS ? 60 : 52)
+
+/* Alignment required for trampolines, in bits. */
+#define TRAMPOLINE_ALIGNMENT 32
+
+/* If defined, a C expression that produces the machine-specific code
+ to setup the stack so that arbitrary frames can be accessed.
+
+ On Xtensa, a stack back-trace must always begin from the stack pointer,
+ so that the register overflow save area can be located. However, the
+ stack-walking code in GCC always begins from the hard_frame_pointer
+ register, not the stack pointer. The frame pointer is usually equal
+ to the stack pointer, but the __builtin_return_address and
+ __builtin_frame_address functions will not work if count > 0 and
+ they are called from a routine that uses alloca. These functions
+ are not guaranteed to work at all if count > 0 so maybe that is OK.
+
+ A nicer solution would be to allow the architecture-specific files to
+ specify whether to start from the stack pointer or frame pointer. That
+ would also allow us to skip the machine->accesses_prev_frame stuff that
+ we currently need to ensure that there is a frame pointer when these
+ builtin functions are used. */
+
+#define SETUP_FRAME_ADDRESSES xtensa_setup_frame_addresses
+
+/* A C expression whose value is RTL representing the address in a
+ stack frame where the pointer to the caller's frame is stored.
+ Assume that FRAMEADDR is an RTL expression for the address of the
+ stack frame itself.
+
+ For Xtensa, there is no easy way to get the frame pointer if it is
+ not equivalent to the stack pointer. Moreover, the result of this
+ macro is used for continuing to walk back up the stack, so it must
+ return the stack pointer address. Thus, there is some inconsistency
+ here in that __builtin_frame_address will return the frame pointer
+ when count == 0 and the stack pointer when count > 0. */
+
+#define DYNAMIC_CHAIN_ADDRESS(frame) \
+ gen_rtx_PLUS (Pmode, frame, GEN_INT (-3 * UNITS_PER_WORD))
+
+/* Define this if the return address of a particular stack frame is
+ accessed from the frame pointer of the previous stack frame. */
+#define RETURN_ADDR_IN_PREVIOUS_FRAME
+
+/* A C expression whose value is RTL representing the value of the
+ return address for the frame COUNT steps up from the current
+ frame, after the prologue. */
+#define RETURN_ADDR_RTX xtensa_return_addr
+
+/* Addressing modes, and classification of registers for them. */
+
+/* C expressions which are nonzero if register number NUM is suitable
+ for use as a base or index register in operand addresses. */
+
+#define REGNO_OK_FOR_INDEX_P(NUM) 0
+#define REGNO_OK_FOR_BASE_P(NUM) \
+ (GP_REG_P (NUM) || GP_REG_P ((unsigned) reg_renumber[NUM]))
+
+/* C expressions that are nonzero if X (assumed to be a `reg' RTX) is
+ valid for use as a base or index register. */
+
+#ifdef REG_OK_STRICT
+#define REG_OK_STRICT_FLAG 1
+#else
+#define REG_OK_STRICT_FLAG 0
+#endif
+
+#define BASE_REG_P(X, STRICT) \
+ ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER) \
+ || REGNO_OK_FOR_BASE_P (REGNO (X)))
+
+#define REG_OK_FOR_INDEX_P(X) 0
+#define REG_OK_FOR_BASE_P(X) BASE_REG_P (X, REG_OK_STRICT_FLAG)
+
+/* Maximum number of registers that can appear in a valid memory address. */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* A C expression that is 1 if the RTX X is a constant which is a
+ valid address. This is defined to be the same as 'CONSTANT_P (X)',
+ but rejecting CONST_DOUBLE. */
+#define CONSTANT_ADDRESS_P(X) \
+ ((GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF \
+ || GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH \
+ || (GET_CODE (X) == CONST)))
+
+/* Nonzero if the constant value X is a legitimate general operand.
+ It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
+#define LEGITIMATE_CONSTANT_P(X) (! xtensa_tls_referenced_p (X))
+
+/* A C expression that is nonzero if X is a legitimate immediate
+ operand on the target machine when generating position independent
+ code. */
+#define LEGITIMATE_PIC_OPERAND_P(X) \
+ ((GET_CODE (X) != SYMBOL_REF \
+ || (SYMBOL_REF_LOCAL_P (X) && !SYMBOL_REF_EXTERNAL_P (X))) \
+ && GET_CODE (X) != LABEL_REF \
+ && GET_CODE (X) != CONST)
+
+/* Specify the machine mode that this machine uses
+ for the index in the tablejump instruction. */
+#define CASE_VECTOR_MODE (SImode)
+
+/* Define this as 1 if 'char' should by default be signed; else as 0. */
+#define DEFAULT_SIGNED_CHAR 0
+
+/* Max number of bytes we can move from memory to memory
+ in one reasonably fast instruction. */
+#define MOVE_MAX 4
+#define MAX_MOVE_MAX 4
+
+/* Prefer word-sized loads. */
+#define SLOW_BYTE_ACCESS 1
+
+/* Shift instructions ignore all but the low-order few bits. */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+ is done just by pretending it is already truncated. */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 1)
+
+/* Specify the machine mode that pointers have.
+ After generation of rtl, the compiler makes no further distinction
+ between pointers and any other objects of this machine mode. */
+#define Pmode SImode
+
+/* A function address in a call instruction is a word address (for
+ indexing purposes) so give the MEM rtx a words's mode. */
+#define FUNCTION_MODE SImode
+
+#define BRANCH_COST(speed_p, predictable_p) 3
+
+/* How to refer to registers in assembler output.
+ This sequence is indexed by compiler's hard-register-number (see above). */
+#define REGISTER_NAMES \
+{ \
+ "a0", "sp", "a2", "a3", "a4", "a5", "a6", "a7", \
+ "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", \
+ "fp", "argp", "b0", \
+ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \
+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \
+ "acc" \
+}
+
+/* If defined, a C initializer for an array of structures containing a
+ name and a register number. This macro defines additional names
+ for hard registers, thus allowing the 'asm' option in declarations
+ to refer to registers using alternate names. */
+#define ADDITIONAL_REGISTER_NAMES \
+{ \
+ { "a1", 1 + GP_REG_FIRST } \
+}
+
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* Globalizing directive for a label. */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* Declare an uninitialized external linkage data object. */
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* This is how to output an element of a case-vector that is absolute. */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+ fprintf (STREAM, "%s%sL%u\n", integer_asm_op (4, TRUE), \
+ LOCAL_LABEL_PREFIX, VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+ This is used for pc-relative code. */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
+ do { \
+ fprintf (STREAM, "%s%sL%u-%sL%u\n", integer_asm_op (4, TRUE), \
+ LOCAL_LABEL_PREFIX, (VALUE), \
+ LOCAL_LABEL_PREFIX, (REL)); \
+ } while (0)
+
+/* This is how to output an assembler line that says to advance the
+ location counter to a multiple of 2**LOG bytes. */
+#define ASM_OUTPUT_ALIGN(STREAM, LOG) \
+ do { \
+ if ((LOG) != 0) \
+ fprintf (STREAM, "\t.align\t%d\n", 1 << (LOG)); \
+ } while (0)
+
+/* Indicate that jump tables go in the text section. This is
+ necessary when compiling PIC code. */
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+
+/* Define the strings to put out for each section in the object file. */
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+
+/* Define output to appear before the constant pool. */
+#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE) \
+ do { \
+ if ((SIZE) > 0) \
+ { \
+ resolve_unique_section ((FUNDECL), 0, flag_function_sections); \
+ switch_to_section (function_section (FUNDECL)); \
+ fprintf (FILE, "\t.literal_position\n"); \
+ } \
+ } while (0)
+
+
+/* A C statement (with or without semicolon) to output a constant in
+ the constant pool, if it needs special treatment. */
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY(FILE, X, MODE, ALIGN, LABELNO, JUMPTO) \
+ do { \
+ xtensa_output_literal (FILE, X, MODE, LABELNO); \
+ goto JUMPTO; \
+ } while (0)
+
+/* How to start an assembler comment. */
+#define ASM_COMMENT_START "#"
+
+/* Exception handling. Xtensa uses much of the standard DWARF2 unwinding
+ machinery, but the variable size register window save areas are too
+ complicated to efficiently describe with CFI entries. The CFA must
+ still be specified in DWARF so that DW_AT_frame_base is set correctly
+ for debugging. */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 0)
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (0)
+#define DWARF_FRAME_REGISTERS 16
+#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) + 2 : INVALID_REGNUM)
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ (flag_pic \
+ ? (((GLOBAL) ? DW_EH_PE_indirect : 0) \
+ | DW_EH_PE_pcrel | DW_EH_PE_sdata4) \
+ : DW_EH_PE_absptr)
+
+/* Emit a PC-relative relocation. */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL) \
+ do { \
+ fputs (integer_asm_op (SIZE, FALSE), FILE); \
+ assemble_name (FILE, LABEL); \
+ fputs ("@pcrel", FILE); \
+ } while (0)
+
+/* Xtensa constant pool breaks the devices in crtstuff.c to control
+ section in where code resides. We have to write it as asm code. Use
+ a MOVI and let the assembler relax it -- for the .init and .fini
+ sections, the assembler knows to put the literal in the right
+ place. */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+ asm (SECTION_OP "\n\
+ movi\ta8, " USER_LABEL_PREFIX #FUNC "\n\
+ callx8\ta8\n" \
+ TEXT_SECTION_ASM_OP);
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
new file mode 100644
index 000000000..d6eb54891
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.md
@@ -0,0 +1,1914 @@
+;; GCC machine description for Tensilica's Xtensa architecture.
+;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_constants [
+ (A0_REG 0)
+ (A1_REG 1)
+ (A7_REG 7)
+ (A8_REG 8)
+
+ (UNSPEC_NOP 2)
+ (UNSPEC_PLT 3)
+ (UNSPEC_RET_ADDR 4)
+ (UNSPEC_TPOFF 5)
+ (UNSPEC_DTPOFF 6)
+ (UNSPEC_TLS_FUNC 7)
+ (UNSPEC_TLS_ARG 8)
+ (UNSPEC_TLS_CALL 9)
+ (UNSPEC_TP 10)
+ (UNSPEC_MEMW 11)
+
+ (UNSPECV_SET_FP 1)
+ (UNSPECV_ENTRY 2)
+ (UNSPECV_S32RI 4)
+ (UNSPECV_S32C1I 5)
+ (UNSPECV_EH_RETURN 6)
+ (UNSPECV_SET_TP 7)
+])
+
+;; This code iterator allows signed and unsigned widening multiplications
+;; to use the same template.
+(define_code_iterator any_extend [sign_extend zero_extend])
+
+;; <u> expands to an empty string when doing a signed operation and
+;; "u" when doing an unsigned operation.
+(define_code_attr u [(sign_extend "") (zero_extend "u")])
+
+;; <su> is like <u>, but the signed form expands to "s" rather than "".
+(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+
+;; This code iterator allows four integer min/max operations to be
+;; generated from one template.
+(define_code_iterator any_minmax [smin umin smax umax])
+
+;; <minmax> expands to the opcode name for any_minmax operations.
+(define_code_attr minmax [(smin "min") (umin "minu")
+ (smax "max") (umax "maxu")])
+
+;; This code iterator is for floating-point comparisons.
+(define_code_iterator any_scc_sf [eq lt le uneq unlt unle unordered])
+(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole")
+ (uneq "ueq") (unlt "ult") (unle "ule")
+ (unordered "un")])
+
+;; This iterator and attribute allow to combine most atomic operations.
+(define_code_iterator ATOMIC [and ior xor plus minus mult])
+(define_code_attr atomic [(and "and") (ior "ior") (xor "xor")
+ (plus "add") (minus "sub") (mult "nand")])
+
+;; This mode iterator allows the HI and QI patterns to be defined from
+;; the same template.
+(define_mode_iterator HQI [HI QI])
+
+
+;; Attributes.
+
+(define_attr "type"
+ "unknown,jump,call,load,store,move,arith,multi,nop,farith,fmadd,fdiv,fsqrt,fconv,fload,fstore,mul16,mul32,div32,mac16,rsr,wsr,entry"
+ (const_string "unknown"))
+
+(define_attr "mode"
+ "unknown,none,QI,HI,SI,DI,SF,DF,BL"
+ (const_string "unknown"))
+
+(define_attr "length" "" (const_int 1))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+ [(set_attr "type" "multi")])
+
+
+;; Pipeline model.
+
+;; The Xtensa basically has simple 5-stage RISC pipeline.
+;; Most instructions complete in 1 cycle, and it is OK to assume that
+;; everything is fully pipelined. The exceptions have special insn
+;; reservations in the pipeline description below. The Xtensa can
+;; issue one instruction per cycle, so defining CPU units is unnecessary.
+
+(define_insn_reservation "xtensa_any_insn" 1
+ (eq_attr "type" "!load,fload,rsr,mul16,mul32,fmadd,fconv")
+ "nothing")
+
+(define_insn_reservation "xtensa_memory" 2
+ (eq_attr "type" "load,fload")
+ "nothing")
+
+(define_insn_reservation "xtensa_sreg" 2
+ (eq_attr "type" "rsr")
+ "nothing")
+
+(define_insn_reservation "xtensa_mul16" 2
+ (eq_attr "type" "mul16")
+ "nothing")
+
+(define_insn_reservation "xtensa_mul32" 2
+ (eq_attr "type" "mul32")
+ "nothing")
+
+(define_insn_reservation "xtensa_fmadd" 4
+ (eq_attr "type" "fmadd")
+ "nothing")
+
+(define_insn_reservation "xtensa_fconv" 2
+ (eq_attr "type" "fconv")
+ "nothing")
+
+;; Include predicates and constraints.
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Addition.
+
+(define_insn "addsi3"
+ [(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a")
+ (plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r")
+ (match_operand:SI 2 "add_operand" "d,O,r,J,N")))]
+ ""
+ "@
+ add.n\t%0, %1, %2
+ addi.n\t%0, %1, %d2
+ add\t%0, %1, %2
+ addi\t%0, %1, %d2
+ addmi\t%0, %1, %x2"
+ [(set_attr "type" "arith,arith,arith,arith,arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "2,2,3,3,3")])
+
+(define_insn "*addx"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 3 "addsubx_operand" "i"))
+ (match_operand:SI 2 "register_operand" "r")))]
+ "TARGET_ADDX"
+ "addx%3\t%0, %1, %2"
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "addsf3"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (plus:SF (match_operand:SF 1 "register_operand" "%f")
+ (match_operand:SF 2 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT"
+ "add.s\t%0, %1, %2"
+ [(set_attr "type" "fmadd")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+
+;; Subtraction.
+
+(define_insn "subsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (minus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r")))]
+ ""
+ "sub\t%0, %1, %2"
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "*subx"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (minus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 3 "addsubx_operand" "i"))
+ (match_operand:SI 2 "register_operand" "r")))]
+ "TARGET_ADDX"
+ "subx%3\t%0, %1, %2"
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "subsf3"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (minus:SF (match_operand:SF 1 "register_operand" "f")
+ (match_operand:SF 2 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT"
+ "sub.s\t%0, %1, %2"
+ [(set_attr "type" "fmadd")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+
+;; Multiplication.
+
+(define_expand "<u>mulsidi3"
+ [(set (match_operand:DI 0 "register_operand")
+ (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+ (any_extend:DI (match_operand:SI 2 "register_operand"))))]
+ "TARGET_MUL32_HIGH"
+{
+ rtx temp = gen_reg_rtx (SImode);
+ emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
+ emit_insn (gen_<u>mulsi3_highpart (gen_highpart (SImode, operands[0]),
+ operands[1], operands[2]));
+ emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp));
+ DONE;
+})
+
+(define_insn "<u>mulsi3_highpart"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "%r"))
+ (any_extend:DI (match_operand:SI 2 "register_operand" "r")))
+ (const_int 32))))]
+ "TARGET_MUL32_HIGH"
+ "mul<su>h\t%0, %1, %2"
+ [(set_attr "type" "mul32")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "mulsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (mult:SI (match_operand:SI 1 "register_operand" "%r")
+ (match_operand:SI 2 "register_operand" "r")))]
+ "TARGET_MUL32"
+ "mull\t%0, %1, %2"
+ [(set_attr "type" "mul32")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "mulhisi3"
+ [(set (match_operand:SI 0 "register_operand" "=C,A")
+ (mult:SI (sign_extend:SI
+ (match_operand:HI 1 "register_operand" "%r,r"))
+ (sign_extend:SI
+ (match_operand:HI 2 "register_operand" "r,r"))))]
+ "TARGET_MUL16 || TARGET_MAC16"
+ "@
+ mul16s\t%0, %1, %2
+ mul.aa.ll\t%1, %2"
+ [(set_attr "type" "mul16,mac16")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,3")])
+
+(define_insn "umulhisi3"
+ [(set (match_operand:SI 0 "register_operand" "=C,A")
+ (mult:SI (zero_extend:SI
+ (match_operand:HI 1 "register_operand" "%r,r"))
+ (zero_extend:SI
+ (match_operand:HI 2 "register_operand" "r,r"))))]
+ "TARGET_MUL16 || TARGET_MAC16"
+ "@
+ mul16u\t%0, %1, %2
+ umul.aa.ll\t%1, %2"
+ [(set_attr "type" "mul16,mac16")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,3")])
+
+(define_insn "muladdhisi"
+ [(set (match_operand:SI 0 "register_operand" "=A")
+ (plus:SI (mult:SI (sign_extend:SI
+ (match_operand:HI 1 "register_operand" "%r"))
+ (sign_extend:SI
+ (match_operand:HI 2 "register_operand" "r")))
+ (match_operand:SI 3 "register_operand" "0")))]
+ "TARGET_MAC16"
+ "mula.aa.ll\t%1, %2"
+ [(set_attr "type" "mac16")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "mulsubhisi"
+ [(set (match_operand:SI 0 "register_operand" "=A")
+ (minus:SI (match_operand:SI 1 "register_operand" "0")
+ (mult:SI (sign_extend:SI
+ (match_operand:HI 2 "register_operand" "%r"))
+ (sign_extend:SI
+ (match_operand:HI 3 "register_operand" "r")))))]
+ "TARGET_MAC16"
+ "muls.aa.ll\t%2, %3"
+ [(set_attr "type" "mac16")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "mulsf3"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (mult:SF (match_operand:SF 1 "register_operand" "%f")
+ (match_operand:SF 2 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT"
+ "mul.s\t%0, %1, %2"
+ [(set_attr "type" "fmadd")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+(define_insn "fmasf4"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (fma:SF (match_operand:SF 1 "register_operand" "f")
+ (match_operand:SF 2 "register_operand" "f")
+ (match_operand:SF 3 "register_operand" "0")))]
+ "TARGET_HARD_FLOAT"
+ "madd.s\t%0, %1, %2"
+ [(set_attr "type" "fmadd")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+;; Note that (C - A*B) = (-A*B + C)
+(define_insn "fnmasf4"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+ (match_operand:SF 2 "register_operand" "f")
+ (match_operand:SF 3 "register_operand" "0")))]
+ "TARGET_HARD_FLOAT"
+ "msub.s\t%0, %1, %2"
+ [(set_attr "type" "fmadd")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+
+;; Division.
+
+(define_insn "divsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (div:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r")))]
+ "TARGET_DIV32"
+ "quos\t%0, %1, %2"
+ [(set_attr "type" "div32")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "udivsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (udiv:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r")))]
+ "TARGET_DIV32"
+ "quou\t%0, %1, %2"
+ [(set_attr "type" "div32")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "divsf3"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (div:SF (match_operand:SF 1 "register_operand" "f")
+ (match_operand:SF 2 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT_DIV"
+ "div.s\t%0, %1, %2"
+ [(set_attr "type" "fdiv")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+(define_insn "*recipsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (div:SF (match_operand:SF 1 "const_float_1_operand" "")
+ (match_operand:SF 2 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT_RECIP && flag_unsafe_math_optimizations"
+ "recip.s\t%0, %2"
+ [(set_attr "type" "fdiv")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+
+;; Remainders.
+
+(define_insn "modsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (mod:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r")))]
+ "TARGET_DIV32"
+ "rems\t%0, %1, %2"
+ [(set_attr "type" "div32")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "umodsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (umod:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r")))]
+ "TARGET_DIV32"
+ "remu\t%0, %1, %2"
+ [(set_attr "type" "div32")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+
+;; Square roots.
+
+(define_insn "sqrtsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT_SQRT"
+ "sqrt.s\t%0, %1"
+ [(set_attr "type" "fsqrt")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+(define_insn "*rsqrtsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (div:SF (match_operand:SF 1 "const_float_1_operand" "")
+ (sqrt:SF (match_operand:SF 2 "register_operand" "f"))))]
+ "TARGET_HARD_FLOAT_RSQRT && flag_unsafe_math_optimizations"
+ "rsqrt.s\t%0, %2"
+ [(set_attr "type" "fsqrt")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+
+;; Absolute value.
+
+(define_insn "abssi2"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (abs:SI (match_operand:SI 1 "register_operand" "r")))]
+ "TARGET_ABS"
+ "abs\t%0, %1"
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "abssf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (abs:SF (match_operand:SF 1 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT"
+ "abs.s\t%0, %1"
+ [(set_attr "type" "farith")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+
+;; Min and max.
+
+(define_insn "<code>si3"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (any_minmax:SI (match_operand:SI 1 "register_operand" "%r")
+ (match_operand:SI 2 "register_operand" "r")))]
+ "TARGET_MINMAX"
+ "<minmax>\t%0, %1, %2"
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+
+;; Count leading/trailing zeros and find first bit.
+
+(define_insn "clzsi2"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (clz:SI (match_operand:SI 1 "register_operand" "r")))]
+ "TARGET_NSA"
+ "nsau\t%0, %1"
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_expand "ctzsi2"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (ctz:SI (match_operand:SI 1 "register_operand" "")))]
+ "TARGET_NSA"
+{
+ rtx temp = gen_reg_rtx (SImode);
+ emit_insn (gen_negsi2 (temp, operands[1]));
+ emit_insn (gen_andsi3 (temp, temp, operands[1]));
+ emit_insn (gen_clzsi2 (temp, temp));
+ emit_insn (gen_negsi2 (temp, temp));
+ emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (31)));
+ DONE;
+})
+
+(define_expand "ffssi2"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (ffs:SI (match_operand:SI 1 "register_operand" "")))]
+ "TARGET_NSA"
+{
+ rtx temp = gen_reg_rtx (SImode);
+ emit_insn (gen_negsi2 (temp, operands[1]));
+ emit_insn (gen_andsi3 (temp, temp, operands[1]));
+ emit_insn (gen_clzsi2 (temp, temp));
+ emit_insn (gen_negsi2 (temp, temp));
+ emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32)));
+ DONE;
+})
+
+
+;; Negation and one's complement.
+
+(define_insn "negsi2"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (neg:SI (match_operand:SI 1 "register_operand" "r")))]
+ ""
+ "neg\t%0, %1"
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_expand "one_cmplsi2"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (not:SI (match_operand:SI 1 "register_operand" "")))]
+ ""
+{
+ rtx temp = gen_reg_rtx (SImode);
+ emit_insn (gen_movsi (temp, constm1_rtx));
+ emit_insn (gen_xorsi3 (operands[0], temp, operands[1]));
+ DONE;
+})
+
+(define_insn "negsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (neg:SF (match_operand:SF 1 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT"
+ "neg.s\t%0, %1"
+ [(set_attr "type" "farith")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+
+;; Logical instructions.
+
+(define_insn "andsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a,a")
+ (and:SI (match_operand:SI 1 "register_operand" "%r,r")
+ (match_operand:SI 2 "mask_operand" "P,r")))]
+ ""
+ "@
+ extui\t%0, %1, 0, %K2
+ and\t%0, %1, %2"
+ [(set_attr "type" "arith,arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,3")])
+
+(define_insn "iorsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (ior:SI (match_operand:SI 1 "register_operand" "%r")
+ (match_operand:SI 2 "register_operand" "r")))]
+ ""
+ "or\t%0, %1, %2"
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "xorsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (xor:SI (match_operand:SI 1 "register_operand" "%r")
+ (match_operand:SI 2 "register_operand" "r")))]
+ ""
+ "xor\t%0, %1, %2"
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+
+;; Zero-extend instructions.
+
+(define_insn "zero_extendhisi2"
+ [(set (match_operand:SI 0 "register_operand" "=a,a")
+ (zero_extend:SI (match_operand:HI 1 "nonimmed_operand" "r,U")))]
+ ""
+ "@
+ extui\t%0, %1, 0, 16
+ l16ui\t%0, %1"
+ [(set_attr "type" "arith,load")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,3")])
+
+(define_insn "zero_extendqisi2"
+ [(set (match_operand:SI 0 "register_operand" "=a,a")
+ (zero_extend:SI (match_operand:QI 1 "nonimmed_operand" "r,U")))]
+ ""
+ "@
+ extui\t%0, %1, 0, 8
+ l8ui\t%0, %1"
+ [(set_attr "type" "arith,load")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,3")])
+
+
+;; Sign-extend instructions.
+
+(define_expand "extendhisi2"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+ ""
+{
+ if (sext_operand (operands[1], HImode))
+ emit_insn (gen_extendhisi2_internal (operands[0], operands[1]));
+ else
+ xtensa_extend_reg (operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "extendhisi2_internal"
+ [(set (match_operand:SI 0 "register_operand" "=B,a")
+ (sign_extend:SI (match_operand:HI 1 "sext_operand" "r,U")))]
+ ""
+ "@
+ sext\t%0, %1, 15
+ l16si\t%0, %1"
+ [(set_attr "type" "arith,load")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,3")])
+
+(define_expand "extendqisi2"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+ ""
+{
+ if (TARGET_SEXT)
+ emit_insn (gen_extendqisi2_internal (operands[0], operands[1]));
+ else
+ xtensa_extend_reg (operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "extendqisi2_internal"
+ [(set (match_operand:SI 0 "register_operand" "=B")
+ (sign_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+ "TARGET_SEXT"
+ "sext\t%0, %1, 7"
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+
+;; Field extract instructions.
+
+(define_expand "extv"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extract:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")))]
+ "TARGET_SEXT"
+{
+ if (!sext_fldsz_operand (operands[2], SImode))
+ FAIL;
+
+ /* We could expand to a right shift followed by SEXT but that's
+ no better than the standard left and right shift sequence. */
+ if (!lsbitnum_operand (operands[3], SImode))
+ FAIL;
+
+ emit_insn (gen_extv_internal (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+(define_insn "extv_internal"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (sign_extract:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "sext_fldsz_operand" "i")
+ (match_operand:SI 3 "lsbitnum_operand" "i")))]
+ "TARGET_SEXT"
+{
+ int fldsz = INTVAL (operands[2]);
+ operands[2] = GEN_INT (fldsz - 1);
+ return "sext\t%0, %1, %2";
+}
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_expand "extzv"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extract:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")))]
+ ""
+{
+ if (!extui_fldsz_operand (operands[2], SImode))
+ FAIL;
+ emit_insn (gen_extzv_internal (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+(define_insn "extzv_internal"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "extui_fldsz_operand" "i")
+ (match_operand:SI 3 "const_int_operand" "i")))]
+ ""
+{
+ int shift;
+ if (BITS_BIG_ENDIAN)
+ shift = (32 - (INTVAL (operands[2]) + INTVAL (operands[3]))) & 0x1f;
+ else
+ shift = INTVAL (operands[3]) & 0x1f;
+ operands[3] = GEN_INT (shift);
+ return "extui\t%0, %1, %3, %2";
+}
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+
+;; Conversions.
+
+(define_insn "fix_truncsfsi2"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (fix:SI (match_operand:SF 1 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT"
+ "trunc.s\t%0, %1, 0"
+ [(set_attr "type" "fconv")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+(define_insn "fixuns_truncsfsi2"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unsigned_fix:SI (match_operand:SF 1 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT"
+ "utrunc.s\t%0, %1, 0"
+ [(set_attr "type" "fconv")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+(define_insn "floatsisf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (float:SF (match_operand:SI 1 "register_operand" "a")))]
+ "TARGET_HARD_FLOAT"
+ "float.s\t%0, %1, 0"
+ [(set_attr "type" "fconv")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+(define_insn "floatunssisf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (unsigned_float:SF (match_operand:SI 1 "register_operand" "a")))]
+ "TARGET_HARD_FLOAT"
+ "ufloat.s\t%0, %1, 0"
+ [(set_attr "type" "fconv")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+
+;; Data movement instructions.
+
+;; 64-bit Integer moves
+
+(define_expand "movdi"
+ [(set (match_operand:DI 0 "nonimmed_operand" "")
+ (match_operand:DI 1 "general_operand" ""))]
+ ""
+{
+ if (CONSTANT_P (operands[1]) && !TARGET_CONST16)
+ operands[1] = force_const_mem (DImode, operands[1]);
+
+ if (!register_operand (operands[0], DImode)
+ && !register_operand (operands[1], DImode))
+ operands[1] = force_reg (DImode, operands[1]);
+
+ operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn_and_split "movdi_internal"
+ [(set (match_operand:DI 0 "nonimmed_operand" "=a,W,a,a,U")
+ (match_operand:DI 1 "move_operand" "r,i,T,U,r"))]
+ "register_operand (operands[0], DImode)
+ || register_operand (operands[1], DImode)"
+ "#"
+ "reload_completed"
+ [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 1) (match_dup 3))]
+{
+ xtensa_split_operand_pair (operands, SImode);
+ if (reg_overlap_mentioned_p (operands[0], operands[3]))
+ {
+ rtx tmp;
+ tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
+ tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+ }
+})
+
+;; 32-bit Integer moves
+
+(define_expand "movsi"
+ [(set (match_operand:SI 0 "nonimmed_operand" "")
+ (match_operand:SI 1 "general_operand" ""))]
+ ""
+{
+ if (xtensa_emit_move_sequence (operands, SImode))
+ DONE;
+})
+
+(define_insn "movsi_internal"
+ [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,W,a,a,U,*a,*A")
+ (match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,i,T,U,r,*A,*r"))]
+ "xtensa_valid_move (SImode, operands)"
+ "@
+ movi.n\t%0, %x1
+ mov.n\t%0, %1
+ mov.n\t%0, %1
+ %v1l32i.n\t%0, %1
+ %v0s32i.n\t%1, %0
+ %v0s32i.n\t%1, %0
+ mov\t%0, %1
+ movsp\t%0, %1
+ movi\t%0, %x1
+ const16\t%0, %t1\;const16\t%0, %b1
+ %v1l32r\t%0, %1
+ %v1l32i\t%0, %1
+ %v0s32i\t%1, %0
+ rsr\t%0, ACCLO
+ wsr\t%1, ACCLO"
+ [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,load,load,store,rsr,wsr")
+ (set_attr "mode" "SI")
+ (set_attr "length" "2,2,2,2,2,2,3,3,3,6,3,3,3,3,3")])
+
+;; 16-bit Integer moves
+
+(define_expand "movhi"
+ [(set (match_operand:HI 0 "nonimmed_operand" "")
+ (match_operand:HI 1 "general_operand" ""))]
+ ""
+{
+ if (xtensa_emit_move_sequence (operands, HImode))
+ DONE;
+})
+
+(define_insn "movhi_internal"
+ [(set (match_operand:HI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
+ (match_operand:HI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+ "xtensa_valid_move (HImode, operands)"
+ "@
+ movi.n\t%0, %x1
+ mov.n\t%0, %1
+ mov\t%0, %1
+ movi\t%0, %x1
+ %v1l16ui\t%0, %1
+ %v0s16i\t%1, %0
+ rsr\t%0, ACCLO
+ wsr\t%1, ACCLO"
+ [(set_attr "type" "move,move,move,move,load,store,rsr,wsr")
+ (set_attr "mode" "HI")
+ (set_attr "length" "2,2,3,3,3,3,3,3")])
+
+;; 8-bit Integer moves
+
+(define_expand "movqi"
+ [(set (match_operand:QI 0 "nonimmed_operand" "")
+ (match_operand:QI 1 "general_operand" ""))]
+ ""
+{
+ if (xtensa_emit_move_sequence (operands, QImode))
+ DONE;
+})
+
+(define_insn "movqi_internal"
+ [(set (match_operand:QI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
+ (match_operand:QI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+ "xtensa_valid_move (QImode, operands)"
+ "@
+ movi.n\t%0, %x1
+ mov.n\t%0, %1
+ mov\t%0, %1
+ movi\t%0, %x1
+ %v1l8ui\t%0, %1
+ %v0s8i\t%1, %0
+ rsr\t%0, ACCLO
+ wsr\t%1, ACCLO"
+ [(set_attr "type" "move,move,move,move,load,store,rsr,wsr")
+ (set_attr "mode" "QI")
+ (set_attr "length" "2,2,3,3,3,3,3,3")])
+
+;; Sub-word reloads from the constant pool.
+
+(define_expand "reload<mode>_literal"
+ [(parallel [(match_operand:HQI 0 "register_operand" "=r")
+ (match_operand:HQI 1 "constantpool_operand" "")
+ (match_operand:SI 2 "register_operand" "=&r")])]
+ ""
+{
+ rtx lit, scratch;
+ unsigned word_off, byte_off;
+
+ if (MEM_P (operands[1]))
+ {
+ lit = operands[1];
+ word_off = 0;
+ byte_off = 0;
+ }
+ else
+ {
+ gcc_assert (GET_CODE (operands[1]) == SUBREG);
+ lit = SUBREG_REG (operands[1]);
+ word_off = SUBREG_BYTE (operands[1]) & ~(UNITS_PER_WORD - 1);
+ byte_off = SUBREG_BYTE (operands[1]) - word_off;
+ }
+
+ lit = adjust_address (lit, SImode, word_off);
+ scratch = operands[2];
+ emit_insn (gen_movsi (scratch, lit));
+ emit_insn (gen_mov<mode> (operands[0],
+ gen_rtx_SUBREG (<MODE>mode, scratch, byte_off)));
+
+ DONE;
+})
+
+;; 32-bit floating point moves
+
+(define_expand "movsf"
+ [(set (match_operand:SF 0 "nonimmed_operand" "")
+ (match_operand:SF 1 "general_operand" ""))]
+ ""
+{
+ if (!TARGET_CONST16 && CONSTANT_P (operands[1]))
+ operands[1] = force_const_mem (SFmode, operands[1]);
+
+ if ((!register_operand (operands[0], SFmode)
+ && !register_operand (operands[1], SFmode))
+ || (FP_REG_P (xt_true_regnum (operands[0]))
+ && !(reload_in_progress | reload_completed)
+ && (constantpool_mem_p (operands[1])
+ || CONSTANT_P (operands[1]))))
+ operands[1] = force_reg (SFmode, operands[1]);
+
+ operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn "movsf_internal"
+ [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,D,R,a,f,a,W,a,a,U")
+ (match_operand:SF 1 "move_operand" "f,U,f,d,R,d,r,r,f,iF,T,U,r"))]
+ "((register_operand (operands[0], SFmode)
+ || register_operand (operands[1], SFmode))
+ && !(FP_REG_P (xt_true_regnum (operands[0]))
+ && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))"
+ "@
+ mov.s\t%0, %1
+ %v1lsi\t%0, %1
+ %v0ssi\t%1, %0
+ mov.n\t%0, %1
+ %v1l32i.n\t%0, %1
+ %v0s32i.n\t%1, %0
+ mov\t%0, %1
+ wfr\t%0, %1
+ rfr\t%0, %1
+ const16\t%0, %t1\;const16\t%0, %b1
+ %v1l32r\t%0, %1
+ %v1l32i\t%0, %1
+ %v0s32i\t%1, %0"
+ [(set_attr "type" "farith,fload,fstore,move,load,store,move,farith,farith,move,load,load,store")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3,3,3,2,2,2,3,3,3,6,3,3,3")])
+
+(define_insn "*lsiu"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (mem:SF (plus:SI (match_operand:SI 1 "register_operand" "+a")
+ (match_operand:SI 2 "fpmem_offset_operand" "i"))))
+ (set (match_dup 1)
+ (plus:SI (match_dup 1) (match_dup 2)))]
+ "TARGET_HARD_FLOAT"
+{
+ if (TARGET_SERIALIZE_VOLATILE && volatile_refs_p (PATTERN (insn)))
+ output_asm_insn ("memw", operands);
+ return "lsiu\t%0, %1, %2";
+}
+ [(set_attr "type" "fload")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+(define_insn "*ssiu"
+ [(set (mem:SF (plus:SI (match_operand:SI 0 "register_operand" "+a")
+ (match_operand:SI 1 "fpmem_offset_operand" "i")))
+ (match_operand:SF 2 "register_operand" "f"))
+ (set (match_dup 0)
+ (plus:SI (match_dup 0) (match_dup 1)))]
+ "TARGET_HARD_FLOAT"
+{
+ if (TARGET_SERIALIZE_VOLATILE && volatile_refs_p (PATTERN (insn)))
+ output_asm_insn ("memw", operands);
+ return "ssiu\t%2, %0, %1";
+}
+ [(set_attr "type" "fstore")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+;; 64-bit floating point moves
+
+(define_expand "movdf"
+ [(set (match_operand:DF 0 "nonimmed_operand" "")
+ (match_operand:DF 1 "general_operand" ""))]
+ ""
+{
+ if (CONSTANT_P (operands[1]) && !TARGET_CONST16)
+ operands[1] = force_const_mem (DFmode, operands[1]);
+
+ if (!register_operand (operands[0], DFmode)
+ && !register_operand (operands[1], DFmode))
+ operands[1] = force_reg (DFmode, operands[1]);
+
+ operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn_and_split "movdf_internal"
+ [(set (match_operand:DF 0 "nonimmed_operand" "=a,W,a,a,U")
+ (match_operand:DF 1 "move_operand" "r,iF,T,U,r"))]
+ "register_operand (operands[0], DFmode)
+ || register_operand (operands[1], DFmode)"
+ "#"
+ "reload_completed"
+ [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 1) (match_dup 3))]
+{
+ xtensa_split_operand_pair (operands, SFmode);
+ if (reg_overlap_mentioned_p (operands[0], operands[3]))
+ {
+ rtx tmp;
+ tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
+ tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+ }
+})
+
+;; Block moves
+
+(define_expand "movmemsi"
+ [(parallel [(set (match_operand:BLK 0 "" "")
+ (match_operand:BLK 1 "" ""))
+ (use (match_operand:SI 2 "arith_operand" ""))
+ (use (match_operand:SI 3 "const_int_operand" ""))])]
+ ""
+{
+ if (!xtensa_expand_block_move (operands))
+ FAIL;
+ DONE;
+})
+
+
+;; Shift instructions.
+
+(define_expand "ashlsi3"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (ashift:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "arith_operand" "")))]
+ ""
+{
+ operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn "ashlsi3_internal"
+ [(set (match_operand:SI 0 "register_operand" "=a,a")
+ (ashift:SI (match_operand:SI 1 "register_operand" "r,r")
+ (match_operand:SI 2 "arith_operand" "J,r")))]
+ ""
+ "@
+ slli\t%0, %1, %R2
+ ssl\t%2\;sll\t%0, %1"
+ [(set_attr "type" "arith,arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,6")])
+
+(define_insn "ashrsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a,a")
+ (ashiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+ (match_operand:SI 2 "arith_operand" "J,r")))]
+ ""
+ "@
+ srai\t%0, %1, %R2
+ ssr\t%2\;sra\t%0, %1"
+ [(set_attr "type" "arith,arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,6")])
+
+(define_insn "lshrsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a,a")
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+ (match_operand:SI 2 "arith_operand" "J,r")))]
+ ""
+{
+ if (which_alternative == 0)
+ {
+ if ((INTVAL (operands[2]) & 0x1f) < 16)
+ return "srli\t%0, %1, %R2";
+ else
+ return "extui\t%0, %1, %R2, %L2";
+ }
+ return "ssr\t%2\;srl\t%0, %1";
+}
+ [(set_attr "type" "arith,arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,6")])
+
+(define_insn "rotlsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a,a")
+ (rotate:SI (match_operand:SI 1 "register_operand" "r,r")
+ (match_operand:SI 2 "arith_operand" "J,r")))]
+ ""
+ "@
+ ssai\t%L2\;src\t%0, %1, %1
+ ssl\t%2\;src\t%0, %1, %1"
+ [(set_attr "type" "multi,multi")
+ (set_attr "mode" "SI")
+ (set_attr "length" "6,6")])
+
+(define_insn "rotrsi3"
+ [(set (match_operand:SI 0 "register_operand" "=a,a")
+ (rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
+ (match_operand:SI 2 "arith_operand" "J,r")))]
+ ""
+ "@
+ ssai\t%R2\;src\t%0, %1, %1
+ ssr\t%2\;src\t%0, %1, %1"
+ [(set_attr "type" "multi,multi")
+ (set_attr "mode" "SI")
+ (set_attr "length" "6,6")])
+
+
+;; Comparisons.
+
+;; Conditional branches.
+
+(define_expand "cbranchsi4"
+ [(match_operator 0 "comparison_operator"
+ [(match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "nonmemory_operand")])
+ (match_operand 3 "")]
+ ""
+{
+ xtensa_expand_conditional_branch (operands, SImode);
+ DONE;
+})
+
+(define_expand "cbranchsf4"
+ [(match_operator 0 "comparison_operator"
+ [(match_operand:SF 1 "register_operand")
+ (match_operand:SF 2 "register_operand")])
+ (match_operand 3 "")]
+ "TARGET_HARD_FLOAT"
+{
+ xtensa_expand_conditional_branch (operands, SFmode);
+ DONE;
+})
+
+;; Branch patterns for standard integer comparisons
+
+(define_insn "*btrue"
+ [(set (pc)
+ (if_then_else (match_operator 3 "branch_operator"
+ [(match_operand:SI 0 "register_operand" "r,r")
+ (match_operand:SI 1 "branch_operand" "K,r")])
+ (label_ref (match_operand 2 "" ""))
+ (pc)))]
+ ""
+{
+ return xtensa_emit_branch (false, which_alternative == 0, operands);
+}
+ [(set_attr "type" "jump,jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3,3")])
+
+(define_insn "*bfalse"
+ [(set (pc)
+ (if_then_else (match_operator 3 "branch_operator"
+ [(match_operand:SI 0 "register_operand" "r,r")
+ (match_operand:SI 1 "branch_operand" "K,r")])
+ (pc)
+ (label_ref (match_operand 2 "" ""))))]
+ ""
+{
+ return xtensa_emit_branch (true, which_alternative == 0, operands);
+}
+ [(set_attr "type" "jump,jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3,3")])
+
+(define_insn "*ubtrue"
+ [(set (pc)
+ (if_then_else (match_operator 3 "ubranch_operator"
+ [(match_operand:SI 0 "register_operand" "r,r")
+ (match_operand:SI 1 "ubranch_operand" "L,r")])
+ (label_ref (match_operand 2 "" ""))
+ (pc)))]
+ ""
+{
+ return xtensa_emit_branch (false, which_alternative == 0, operands);
+}
+ [(set_attr "type" "jump,jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3,3")])
+
+(define_insn "*ubfalse"
+ [(set (pc)
+ (if_then_else (match_operator 3 "ubranch_operator"
+ [(match_operand:SI 0 "register_operand" "r,r")
+ (match_operand:SI 1 "ubranch_operand" "L,r")])
+ (pc)
+ (label_ref (match_operand 2 "" ""))))]
+ ""
+{
+ return xtensa_emit_branch (true, which_alternative == 0, operands);
+}
+ [(set_attr "type" "jump,jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3,3")])
+
+;; Branch patterns for bit testing
+
+(define_insn "*bittrue"
+ [(set (pc)
+ (if_then_else (match_operator 3 "boolean_operator"
+ [(zero_extract:SI
+ (match_operand:SI 0 "register_operand" "r,r")
+ (const_int 1)
+ (match_operand:SI 1 "arith_operand" "J,r"))
+ (const_int 0)])
+ (label_ref (match_operand 2 "" ""))
+ (pc)))]
+ ""
+{
+ return xtensa_emit_bit_branch (false, which_alternative == 0, operands);
+}
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+(define_insn "*bitfalse"
+ [(set (pc)
+ (if_then_else (match_operator 3 "boolean_operator"
+ [(zero_extract:SI
+ (match_operand:SI 0 "register_operand" "r,r")
+ (const_int 1)
+ (match_operand:SI 1 "arith_operand" "J,r"))
+ (const_int 0)])
+ (pc)
+ (label_ref (match_operand 2 "" ""))))]
+ ""
+{
+ return xtensa_emit_bit_branch (true, which_alternative == 0, operands);
+}
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+(define_insn "*masktrue"
+ [(set (pc)
+ (if_then_else (match_operator 3 "boolean_operator"
+ [(and:SI (match_operand:SI 0 "register_operand" "r")
+ (match_operand:SI 1 "register_operand" "r"))
+ (const_int 0)])
+ (label_ref (match_operand 2 "" ""))
+ (pc)))]
+ ""
+{
+ switch (GET_CODE (operands[3]))
+ {
+ case EQ: return "bnone\t%0, %1, %2";
+ case NE: return "bany\t%0, %1, %2";
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+(define_insn "*maskfalse"
+ [(set (pc)
+ (if_then_else (match_operator 3 "boolean_operator"
+ [(and:SI (match_operand:SI 0 "register_operand" "r")
+ (match_operand:SI 1 "register_operand" "r"))
+ (const_int 0)])
+ (pc)
+ (label_ref (match_operand 2 "" ""))))]
+ ""
+{
+ switch (GET_CODE (operands[3]))
+ {
+ case EQ: return "bany\t%0, %1, %2";
+ case NE: return "bnone\t%0, %1, %2";
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+
+;; Define the loop insns used by bct optimization to represent the
+;; start and end of a zero-overhead loop (in loop.c). This start
+;; template generates the loop insn; the end template doesn't generate
+;; any instructions since loop end is handled in hardware.
+
+(define_insn "zero_cost_loop_start"
+ [(set (pc)
+ (if_then_else (eq (match_operand:SI 0 "register_operand" "a")
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))
+ (set (reg:SI 19)
+ (plus:SI (match_dup 0) (const_int -1)))]
+ ""
+ "loopnez\t%0, %l1"
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+(define_insn "zero_cost_loop_end"
+ [(set (pc)
+ (if_then_else (ne (reg:SI 19) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))
+ (set (reg:SI 19)
+ (plus:SI (reg:SI 19) (const_int -1)))]
+ ""
+{
+ xtensa_emit_loop_end (insn, operands);
+ return "";
+}
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "0")])
+
+
+;; Setting a register from a comparison.
+
+(define_expand "cstoresi4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operator 1 "xtensa_cstoresi_operator"
+ [(match_operand:SI 2 "register_operand")
+ (match_operand:SI 3 "nonmemory_operand")])]
+ ""
+{
+ if (!xtensa_expand_scc (operands, SImode))
+ FAIL;
+ DONE;
+})
+
+(define_expand "cstoresf4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operator:SI 1 "comparison_operator"
+ [(match_operand:SF 2 "register_operand")
+ (match_operand:SF 3 "register_operand")])]
+ "TARGET_HARD_FLOAT"
+{
+ if (!xtensa_expand_scc (operands, SFmode))
+ FAIL;
+ DONE;
+})
+
+
+
+;; Conditional moves.
+
+(define_expand "movsicc"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (if_then_else:SI (match_operand 1 "comparison_operator" "")
+ (match_operand:SI 2 "register_operand" "")
+ (match_operand:SI 3 "register_operand" "")))]
+ ""
+{
+ if (!xtensa_expand_conditional_move (operands, 0))
+ FAIL;
+ DONE;
+})
+
+(define_expand "movsfcc"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (if_then_else:SF (match_operand 1 "comparison_operator" "")
+ (match_operand:SF 2 "register_operand" "")
+ (match_operand:SF 3 "register_operand" "")))]
+ ""
+{
+ if (!xtensa_expand_conditional_move (operands, 1))
+ FAIL;
+ DONE;
+})
+
+(define_insn "movsicc_internal0"
+ [(set (match_operand:SI 0 "register_operand" "=a,a")
+ (if_then_else:SI (match_operator 4 "branch_operator"
+ [(match_operand:SI 1 "register_operand" "r,r")
+ (const_int 0)])
+ (match_operand:SI 2 "register_operand" "r,0")
+ (match_operand:SI 3 "register_operand" "0,r")))]
+ ""
+{
+ return xtensa_emit_movcc (which_alternative == 1, false, false, operands);
+}
+ [(set_attr "type" "move,move")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,3")])
+
+(define_insn "movsicc_internal1"
+ [(set (match_operand:SI 0 "register_operand" "=a,a")
+ (if_then_else:SI (match_operator 4 "boolean_operator"
+ [(match_operand:CC 1 "register_operand" "b,b")
+ (const_int 0)])
+ (match_operand:SI 2 "register_operand" "r,0")
+ (match_operand:SI 3 "register_operand" "0,r")))]
+ "TARGET_BOOLEANS"
+{
+ return xtensa_emit_movcc (which_alternative == 1, false, true, operands);
+}
+ [(set_attr "type" "move,move")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3,3")])
+
+(define_insn "movsfcc_internal0"
+ [(set (match_operand:SF 0 "register_operand" "=a,a,f,f")
+ (if_then_else:SF (match_operator 4 "branch_operator"
+ [(match_operand:SI 1 "register_operand" "r,r,r,r")
+ (const_int 0)])
+ (match_operand:SF 2 "register_operand" "r,0,f,0")
+ (match_operand:SF 3 "register_operand" "0,r,0,f")))]
+ ""
+{
+ return xtensa_emit_movcc ((which_alternative & 1) == 1,
+ which_alternative >= 2, false, operands);
+}
+ [(set_attr "type" "move,move,move,move")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3,3,3,3")])
+
+(define_insn "movsfcc_internal1"
+ [(set (match_operand:SF 0 "register_operand" "=a,a,f,f")
+ (if_then_else:SF (match_operator 4 "boolean_operator"
+ [(match_operand:CC 1 "register_operand" "b,b,b,b")
+ (const_int 0)])
+ (match_operand:SF 2 "register_operand" "r,0,f,0")
+ (match_operand:SF 3 "register_operand" "0,r,0,f")))]
+ "TARGET_BOOLEANS"
+{
+ return xtensa_emit_movcc ((which_alternative & 1) == 1,
+ which_alternative >= 2, true, operands);
+}
+ [(set_attr "type" "move,move,move,move")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3,3,3,3")])
+
+
+;; Floating-point comparisons.
+
+(define_insn "s<code>_sf"
+ [(set (match_operand:CC 0 "register_operand" "=b")
+ (any_scc_sf:CC (match_operand:SF 1 "register_operand" "f")
+ (match_operand:SF 2 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT"
+ "<scc_sf>.s\t%0, %1, %2"
+ [(set_attr "type" "farith")
+ (set_attr "mode" "BL")
+ (set_attr "length" "3")])
+
+
+;; Unconditional branches.
+
+(define_insn "jump"
+ [(set (pc)
+ (label_ref (match_operand 0 "" "")))]
+ ""
+ "j\t%l0"
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+(define_expand "indirect_jump"
+ [(set (pc)
+ (match_operand 0 "register_operand" ""))]
+ ""
+{
+ rtx dest = operands[0];
+ if (GET_CODE (dest) != REG || GET_MODE (dest) != Pmode)
+ operands[0] = copy_to_mode_reg (Pmode, dest);
+
+ emit_jump_insn (gen_indirect_jump_internal (dest));
+ DONE;
+})
+
+(define_insn "indirect_jump_internal"
+ [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+ ""
+ "jx\t%0"
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+
+(define_expand "tablejump"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (label_ref (match_operand 1 "" "")))]
+ ""
+{
+ rtx target = operands[0];
+ if (flag_pic)
+ {
+ /* For PIC, the table entry is relative to the start of the table. */
+ rtx label = gen_reg_rtx (SImode);
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (label, gen_rtx_LABEL_REF (SImode, operands[1]));
+ emit_insn (gen_addsi3 (target, operands[0], label));
+ }
+ emit_jump_insn (gen_tablejump_internal (target, operands[1]));
+ DONE;
+})
+
+(define_insn "tablejump_internal"
+ [(set (pc)
+ (match_operand:SI 0 "register_operand" "r"))
+ (use (label_ref (match_operand 1 "" "")))]
+ ""
+ "jx\t%0"
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+
+;; Function calls.
+
+(define_expand "sym_PLT"
+ [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PLT))]
+ ""
+ "")
+
+(define_expand "call"
+ [(call (match_operand 0 "memory_operand" "")
+ (match_operand 1 "" ""))]
+ ""
+{
+ rtx addr = XEXP (operands[0], 0);
+ if (flag_pic && GET_CODE (addr) == SYMBOL_REF
+ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
+ addr = gen_sym_PLT (addr);
+ if (!call_insn_operand (addr, VOIDmode))
+ XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr);
+})
+
+(define_insn "call_internal"
+ [(call (mem (match_operand:SI 0 "call_insn_operand" "nir"))
+ (match_operand 1 "" "i"))]
+ ""
+{
+ return xtensa_emit_call (0, operands);
+}
+ [(set_attr "type" "call")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+(define_expand "call_value"
+ [(set (match_operand 0 "register_operand" "")
+ (call (match_operand 1 "memory_operand" "")
+ (match_operand 2 "" "")))]
+ ""
+{
+ rtx addr = XEXP (operands[1], 0);
+ if (flag_pic && GET_CODE (addr) == SYMBOL_REF
+ && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
+ addr = gen_sym_PLT (addr);
+ if (!call_insn_operand (addr, VOIDmode))
+ XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr);
+})
+
+(define_insn "call_value_internal"
+ [(set (match_operand 0 "register_operand" "=a")
+ (call (mem (match_operand:SI 1 "call_insn_operand" "nir"))
+ (match_operand 2 "" "i")))]
+ ""
+{
+ return xtensa_emit_call (1, operands);
+}
+ [(set_attr "type" "call")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+(define_insn "entry"
+ [(set (reg:SI A1_REG)
+ (unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")]
+ UNSPECV_ENTRY))]
+ ""
+ "entry\tsp, %0"
+ [(set_attr "type" "entry")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "return"
+ [(return)
+ (use (reg:SI A0_REG))]
+ "reload_completed"
+{
+ return (TARGET_DENSITY ? "retw.n" : "retw");
+}
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "2")])
+
+
+;; Miscellaneous instructions.
+
+(define_expand "prologue"
+ [(const_int 0)]
+ ""
+{
+ xtensa_expand_prologue ();
+ DONE;
+})
+
+(define_expand "epilogue"
+ [(return)]
+ ""
+{
+ emit_jump_insn (gen_return ());
+ DONE;
+})
+
+(define_insn "nop"
+ [(const_int 0)]
+ ""
+{
+ return (TARGET_DENSITY ? "nop.n" : "nop");
+}
+ [(set_attr "type" "nop")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+(define_expand "nonlocal_goto"
+ [(match_operand:SI 0 "general_operand" "")
+ (match_operand:SI 1 "general_operand" "")
+ (match_operand:SI 2 "general_operand" "")
+ (match_operand:SI 3 "" "")]
+ ""
+{
+ xtensa_expand_nonlocal_goto (operands);
+ DONE;
+})
+
+;; Stuff an address into the return address register along with the window
+;; size in the high bits. Because we don't have the window size of the
+;; previous frame, assume the function called out with a CALL8 since that
+;; is what compilers always use. Note: __builtin_frob_return_addr has
+;; already been applied to the handler, but the generic version doesn't
+;; allow us to frob it quite enough, so we just frob here.
+
+(define_insn_and_split "eh_return"
+ [(set (reg:SI A0_REG)
+ (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+ UNSPECV_EH_RETURN))
+ (clobber (match_scratch:SI 1 "=r"))]
+ ""
+ "#"
+ "reload_completed"
+ [(set (match_dup 1) (ashift:SI (match_dup 0) (const_int 2)))
+ (set (match_dup 1) (plus:SI (match_dup 1) (const_int 2)))
+ (set (reg:SI A0_REG) (rotatert:SI (match_dup 1) (const_int 2)))]
+ "")
+
+;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't
+;; know if a frame pointer is required until the reload pass, and
+;; because there may be an incoming argument value in the hard frame
+;; pointer register (a7). If there is an incoming argument in that
+;; register, the "set_frame_ptr" insn gets inserted immediately after
+;; the insn that copies the incoming argument to a pseudo or to the
+;; stack. This serves several purposes here: (1) it keeps the
+;; optimizer from copy-propagating or scheduling the use of a7 as an
+;; incoming argument away from the beginning of the function; (2) we
+;; can use a post-reload splitter to expand away the insn if a frame
+;; pointer is not required, so that the post-reload scheduler can do
+;; the right thing; and (3) it makes it easy for the prologue expander
+;; to search for this insn to determine whether it should add a new insn
+;; to set up the frame pointer.
+
+(define_insn "set_frame_ptr"
+ [(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))]
+ ""
+{
+ if (frame_pointer_needed)
+ return "mov\ta7, sp";
+ return "";
+}
+ [(set_attr "type" "move")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+;; Post-reload splitter to remove fp assignment when it's not needed.
+(define_split
+ [(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))]
+ "reload_completed && !frame_pointer_needed"
+ [(unspec [(const_int 0)] UNSPEC_NOP)]
+ "")
+
+;; The preceding splitter needs something to split the insn into;
+;; things start breaking if the result is just a "use" so instead we
+;; generate the following insn.
+(define_insn "*unspec_nop"
+ [(unspec [(const_int 0)] UNSPEC_NOP)]
+ ""
+ ""
+ [(set_attr "type" "nop")
+ (set_attr "mode" "none")
+ (set_attr "length" "0")])
+
+
+;; TLS support
+
+(define_expand "sym_TPOFF"
+ [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_TPOFF))]
+ ""
+ "")
+
+(define_expand "sym_DTPOFF"
+ [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_DTPOFF))]
+ ""
+ "")
+
+(define_insn "load_tp"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(const_int 0)] UNSPEC_TP))]
+ "TARGET_THREADPTR"
+ "rur\t%0, THREADPTR"
+ [(set_attr "type" "rsr")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "set_tp"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+ UNSPECV_SET_TP)]
+ "TARGET_THREADPTR"
+ "wur\t%0, THREADPTR"
+ [(set_attr "type" "wsr")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "tls_func"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "tls_symbol_operand" "")]
+ UNSPEC_TLS_FUNC))]
+ "TARGET_THREADPTR && HAVE_AS_TLS"
+ "movi\t%0, %1@TLSFUNC"
+ [(set_attr "type" "load")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "tls_arg"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "tls_symbol_operand" "")]
+ UNSPEC_TLS_ARG))]
+ "TARGET_THREADPTR && HAVE_AS_TLS"
+ "movi\t%0, %1@TLSARG"
+ [(set_attr "type" "load")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "tls_call"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (call (mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "tls_symbol_operand" "")]
+ UNSPEC_TLS_CALL))
+ (match_operand 3 "" "i")))]
+ "TARGET_THREADPTR && HAVE_AS_TLS"
+ "callx8.tls %1, %2@TLSCALL"
+ [(set_attr "type" "call")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+
+;; Instructions for the Xtensa "boolean" option.
+
+(define_insn "*booltrue"
+ [(set (pc)
+ (if_then_else (match_operator 2 "boolean_operator"
+ [(match_operand:CC 0 "register_operand" "b")
+ (const_int 0)])
+ (label_ref (match_operand 1 "" ""))
+ (pc)))]
+ "TARGET_BOOLEANS"
+{
+ if (GET_CODE (operands[2]) == EQ)
+ return "bf\t%0, %1";
+ else
+ return "bt\t%0, %1";
+}
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+(define_insn "*boolfalse"
+ [(set (pc)
+ (if_then_else (match_operator 2 "boolean_operator"
+ [(match_operand:CC 0 "register_operand" "b")
+ (const_int 0)])
+ (pc)
+ (label_ref (match_operand 1 "" ""))))]
+ "TARGET_BOOLEANS"
+{
+ if (GET_CODE (operands[2]) == EQ)
+ return "bt\t%0, %1";
+ else
+ return "bf\t%0, %1";
+}
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+
+;; Atomic operations
+
+(define_expand "memory_barrier"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_MEMW))]
+ ""
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_MEMW))]
+ ""
+ "memw"
+ [(set_attr "type" "unknown")
+ (set_attr "mode" "none")
+ (set_attr "length" "3")])
+
+;; sync_lock_release is only implemented for SImode.
+;; For other modes, just use the default of a store with a memory_barrier.
+(define_insn "sync_lock_releasesi"
+ [(set (match_operand:SI 0 "mem_operand" "=U")
+ (unspec_volatile:SI
+ [(match_operand:SI 1 "register_operand" "r")]
+ UNSPECV_S32RI))]
+ "TARGET_RELEASE_SYNC"
+ "s32ri\t%1, %0"
+ [(set_attr "type" "store")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
+(define_insn "sync_compare_and_swapsi"
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (match_operand:SI 1 "mem_operand" "+U"))
+ (set (match_dup 1)
+ (unspec_volatile:SI
+ [(match_dup 1)
+ (match_operand:SI 2 "register_operand" "r")
+ (match_operand:SI 3 "register_operand" "0")]
+ UNSPECV_S32C1I))])]
+ "TARGET_S32C1I"
+ "wsr\t%2, SCOMPARE1\;s32c1i\t%3, %1"
+ [(set_attr "type" "multi")
+ (set_attr "mode" "SI")
+ (set_attr "length" "6")])
+
+(define_expand "sync_compare_and_swap<mode>"
+ [(parallel
+ [(set (match_operand:HQI 0 "register_operand" "")
+ (match_operand:HQI 1 "mem_operand" ""))
+ (set (match_dup 1)
+ (unspec_volatile:HQI
+ [(match_dup 1)
+ (match_operand:HQI 2 "register_operand" "")
+ (match_operand:HQI 3 "register_operand" "")]
+ UNSPECV_S32C1I))])]
+ "TARGET_S32C1I"
+{
+ xtensa_expand_compare_and_swap (operands[0], operands[1],
+ operands[2], operands[3]);
+ DONE;
+})
+
+(define_expand "sync_lock_test_and_set<mode>"
+ [(match_operand:HQI 0 "register_operand")
+ (match_operand:HQI 1 "memory_operand")
+ (match_operand:HQI 2 "register_operand")]
+ "TARGET_S32C1I"
+{
+ xtensa_expand_atomic (SET, operands[0], operands[1], operands[2], false);
+ DONE;
+})
+
+(define_expand "sync_<atomic><mode>"
+ [(set (match_operand:HQI 0 "memory_operand")
+ (ATOMIC:HQI (match_dup 0)
+ (match_operand:HQI 1 "register_operand")))]
+ "TARGET_S32C1I"
+{
+ xtensa_expand_atomic (<CODE>, NULL_RTX, operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "sync_old_<atomic><mode>"
+ [(set (match_operand:HQI 0 "register_operand")
+ (match_operand:HQI 1 "memory_operand"))
+ (set (match_dup 1)
+ (ATOMIC:HQI (match_dup 1)
+ (match_operand:HQI 2 "register_operand")))]
+ "TARGET_S32C1I"
+{
+ xtensa_expand_atomic (<CODE>, operands[0], operands[1], operands[2], false);
+ DONE;
+})
+
+(define_expand "sync_new_<atomic><mode>"
+ [(set (match_operand:HQI 0 "register_operand")
+ (ATOMIC:HQI (match_operand:HQI 1 "memory_operand")
+ (match_operand:HQI 2 "register_operand")))
+ (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))]
+ "TARGET_S32C1I"
+{
+ xtensa_expand_atomic (<CODE>, operands[0], operands[1], operands[2], true);
+ DONE;
+})
diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt
new file mode 100644
index 000000000..e78104f1c
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.opt
@@ -0,0 +1,43 @@
+; Options for the Tensilica Xtensa port of the compiler.
+
+; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3. If not see
+; <http://www.gnu.org/licenses/>.
+
+mconst16
+Target Report Mask(CONST16)
+Use CONST16 instruction to load constants
+
+mforce-no-pic
+Target Report Mask(FORCE_NO_PIC)
+Disable position-independent code (PIC) for use in OS kernel code
+
+mlongcalls
+Target
+Use indirect CALLXn instructions for large programs
+
+mtarget-align
+Target
+Automatically align branch targets to reduce branch penalties
+
+mtext-section-literals
+Target
+Intersperse literal pools with code in the text section
+
+mserialize-volatile
+Target Report Mask(SERIALIZE_VOLATILE)
+-mno-serialize-volatile Do not serialize volatile memory references with MEMW instructions