From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001
From: upstream source tree <ports@midipix.org>
Date: Sun, 15 Mar 2015 20:14:05 -0400
Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified
 gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream
 tarball.

downloading a git-generated archive based on the 'upstream' tag
should provide you with a source tree that is binary identical
to the one extracted from the above tarball.

if you have obtained the source via the command 'git clone',
however, do note that line-endings of files in your working
directory might differ from line-endings of the respective
files in the upstream repository.
---
 gcc/config/xtensa/constraints.md      |  139 ++
 gcc/config/xtensa/crti.asm            |   51 +
 gcc/config/xtensa/crtn.asm            |   46 +
 gcc/config/xtensa/elf.h               |  104 +
 gcc/config/xtensa/elf.opt             |   30 +
 gcc/config/xtensa/ieee754-df.S        | 2388 +++++++++++++++++++++
 gcc/config/xtensa/ieee754-sf.S        | 1757 ++++++++++++++++
 gcc/config/xtensa/lib1funcs.asm       |  845 ++++++++
 gcc/config/xtensa/lib2funcs.S         |  186 ++
 gcc/config/xtensa/libgcc-xtensa.ver   |    3 +
 gcc/config/xtensa/linux-unwind.h      |   97 +
 gcc/config/xtensa/linux.h             |   71 +
 gcc/config/xtensa/predicates.md       |  175 ++
 gcc/config/xtensa/t-elf               |    6 +
 gcc/config/xtensa/t-linux             |    3 +
 gcc/config/xtensa/t-xtensa            |   42 +
 gcc/config/xtensa/unwind-dw2-xtensa.c |  546 +++++
 gcc/config/xtensa/unwind-dw2-xtensa.h |   50 +
 gcc/config/xtensa/xtensa-protos.h     |   74 +
 gcc/config/xtensa/xtensa.c            | 3715 +++++++++++++++++++++++++++++++++
 gcc/config/xtensa/xtensa.h            |  847 ++++++++
 gcc/config/xtensa/xtensa.md           | 1914 +++++++++++++++++
 gcc/config/xtensa/xtensa.opt          |   43 +
 23 files changed, 13132 insertions(+)
 create mode 100644 gcc/config/xtensa/constraints.md
 create mode 100644 gcc/config/xtensa/crti.asm
 create mode 100644 gcc/config/xtensa/crtn.asm
 create mode 100644 gcc/config/xtensa/elf.h
 create mode 100644 gcc/config/xtensa/elf.opt
 create mode 100644 gcc/config/xtensa/ieee754-df.S
 create mode 100644 gcc/config/xtensa/ieee754-sf.S
 create mode 100644 gcc/config/xtensa/lib1funcs.asm
 create mode 100644 gcc/config/xtensa/lib2funcs.S
 create mode 100644 gcc/config/xtensa/libgcc-xtensa.ver
 create mode 100644 gcc/config/xtensa/linux-unwind.h
 create mode 100644 gcc/config/xtensa/linux.h
 create mode 100644 gcc/config/xtensa/predicates.md
 create mode 100644 gcc/config/xtensa/t-elf
 create mode 100644 gcc/config/xtensa/t-linux
 create mode 100644 gcc/config/xtensa/t-xtensa
 create mode 100644 gcc/config/xtensa/unwind-dw2-xtensa.c
 create mode 100644 gcc/config/xtensa/unwind-dw2-xtensa.h
 create mode 100644 gcc/config/xtensa/xtensa-protos.h
 create mode 100644 gcc/config/xtensa/xtensa.c
 create mode 100644 gcc/config/xtensa/xtensa.h
 create mode 100644 gcc/config/xtensa/xtensa.md
 create mode 100644 gcc/config/xtensa/xtensa.opt

(limited to 'gcc/config/xtensa')

diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
new file mode 100644
index 000000000..bde1ba31a
--- /dev/null
+++ b/gcc/config/xtensa/constraints.md
@@ -0,0 +1,139 @@
+;; Constraint definitions for Xtensa.
+;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+
+(define_register_constraint "a" "GR_REGS"
+ "General-purpose AR registers @code{a0}-@code{a15},
+  except @code{a1} (@code{sp}).")
+
+(define_register_constraint "b" "TARGET_BOOLEANS ? BR_REGS : NO_REGS"
+ "Boolean registers @code{b0}-@code{b15}; only available if the Xtensa
+  Boolean Option is configured.")
+
+(define_register_constraint "d" "TARGET_DENSITY ? AR_REGS: NO_REGS"
+ "@internal
+  All AR registers, including sp, but only if the Xtensa Code Density
+  Option is configured.")
+
+(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
+ "Floating-point registers @code{f0}-@code{f15}; only available if the
+  Xtensa Floating-Pointer Coprocessor is configured.")
+
+(define_register_constraint "q" "SP_REG"
+ "@internal
+  The stack pointer (register @code{a1}).")
+
+(define_register_constraint "A" "TARGET_MAC16 ? ACC_REG : NO_REGS"
+ "The low 32 bits of the accumulator from the Xtensa MAC16 Option.")
+
+(define_register_constraint "B" "TARGET_SEXT ? GR_REGS : NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa Sign Extend
+  Option is configured.")
+
+(define_register_constraint "C" "TARGET_MUL16 ? GR_REGS: NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa 16-Bit Integer
+  Multiply Option is configured.")
+
+(define_register_constraint "D" "TARGET_DENSITY ? GR_REGS: NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa Code Density
+  Option is configured.")
+
+(define_register_constraint "W" "TARGET_CONST16 ? GR_REGS: NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa Const16
+  Option is configured.")
+
+;; Integer constant constraints.
+
+(define_constraint "I"
+ "A signed 12-bit integer constant for use with MOVI instructions."
+ (and (match_code "const_int")
+      (match_test "xtensa_simm12b (ival)")))
+
+(define_constraint "J"
+ "A signed 8-bit integer constant for use with ADDI instructions."
+ (and (match_code "const_int")
+      (match_test "xtensa_simm8 (ival)")))
+
+(define_constraint "K"
+ "A constant integer that can be an immediate operand of an Xtensa
+  conditional branch instruction that performs a signed comparison or
+  a comparison against zero."
+ (and (match_code "const_int")
+      (match_test "xtensa_b4const_or_zero (ival)")))
+
+(define_constraint "L"
+ "A constant integer that can be an immediate operand of an Xtensa
+  conditional branch instruction that performs an unsigned comparison."
+ (and (match_code "const_int")
+      (match_test "xtensa_b4constu (ival)")))
+
+(define_constraint "M"
+ "An integer constant in the range @minus{}32-95 for use with MOVI.N
+  instructions."
+ (and (match_code "const_int")
+      (match_test "ival >= -32 && ival <= 95")))
+
+(define_constraint "N"
+ "An unsigned 8-bit integer constant shifted left by 8 bits for use
+  with ADDMI instructions."
+ (and (match_code "const_int")
+      (match_test "xtensa_simm8x256 (ival)")))
+
+(define_constraint "O"
+ "An integer constant that can be used in ADDI.N instructions."
+ (and (match_code "const_int")
+      (match_test "ival == -1 || (ival >= 1 && ival <= 15)")))
+
+(define_constraint "P"
+ "An integer constant that can be used as a mask value in an EXTUI
+  instruction."
+ (and (match_code "const_int")
+      (match_test "xtensa_mask_immediate (ival)")))
+
+;; Memory constraints.  Do not use define_memory_constraint here.  Doing so
+;; causes reload to force some constants into the constant pool, but since
+;; the Xtensa constant pool can only be accessed with L32R instructions, it
+;; is always better to just copy a constant into a register.  Instead, use
+;; regular constraints but add a check to allow pseudos during reload.
+
+(define_constraint "R"
+ "Memory that can be accessed with a 4-bit unsigned offset from a register."
+ (ior (and (match_code "mem")
+	   (match_test "smalloffset_mem_p (op)"))
+      (and (match_code "reg")
+	   (match_test "reload_in_progress
+			&& REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
+
+(define_constraint "T"
+ "Memory in a literal pool (addressable with an L32R instruction)."
+ (and (match_code "mem")
+      (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
+
+(define_constraint "U"
+ "Memory that is not in a literal pool."
+ (ior (and (match_code "mem")
+	   (match_test "! constantpool_mem_p (op)"))
+      (and (match_code "reg")
+	   (match_test "reload_in_progress
+			&& REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
diff --git a/gcc/config/xtensa/crti.asm b/gcc/config/xtensa/crti.asm
new file mode 100644
index 000000000..cbe91b0e7
--- /dev/null
+++ b/gcc/config/xtensa/crti.asm
@@ -0,0 +1,51 @@
+# Start .init and .fini sections.
+# Copyright (C) 2003, 2009 Free Software Foundation, Inc.
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just makes a stack frame for the contents of the .fini and
+# .init sections.  Users may put any desired instructions in those
+# sections.
+
+#include "xtensa-config.h"
+
+	.section .init
+	.globl _init
+	.type _init,@function
+	.align	4
+_init:
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	entry	sp, 64
+#else
+	addi	sp, sp, -32
+	s32i	a0, sp, 0
+#endif
+
+	.section .fini
+	.globl _fini
+	.type _fini,@function
+	.align	4
+_fini:
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	entry	sp, 64
+#else
+	addi	sp, sp, -32
+	s32i	a0, sp, 0
+#endif
diff --git a/gcc/config/xtensa/crtn.asm b/gcc/config/xtensa/crtn.asm
new file mode 100644
index 000000000..413cfa0ac
--- /dev/null
+++ b/gcc/config/xtensa/crtn.asm
@@ -0,0 +1,46 @@
+# End of .init and .fini sections.
+# Copyright (C) 2003, 2009 Free Software Foundation, Inc.
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+
+# This file just makes sure that the .fini and .init sections do in
+# fact return.  Users may put any desired instructions in those sections.
+# This file is the last thing linked into any executable.
+
+#include "xtensa-config.h"
+
+	.section .init
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	retw
+#else
+	l32i	a0, sp, 0
+	addi	sp, sp, 32
+	ret
+#endif
+
+	.section .fini
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	retw
+#else
+	l32i	a0, sp, 0
+	addi	sp, sp, 32
+	ret
+#endif
diff --git a/gcc/config/xtensa/elf.h b/gcc/config/xtensa/elf.h
new file mode 100644
index 000000000..54a9c8f19
--- /dev/null
+++ b/gcc/config/xtensa/elf.h
@@ -0,0 +1,104 @@
+/* Xtensa/Elf configuration.
+   Derived from the configuration for GCC for Intel i386 running Linux.
+   Copyright (C) 2001, 2003, 2006, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_SECTION_TYPE_FLAGS xtensa_multibss_section_type_flags
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fputs (" (Xtensa/ELF)", stderr);
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+ "%{mtext-section-literals:--text-section-literals} \
+  %{mno-text-section-literals:--no-text-section-literals} \
+  %{mtarget-align:--target-align} \
+  %{mno-target-align:--no-target-align} \
+  %{mlongcalls:--longcalls} \
+  %{mno-longcalls:--no-longcalls}"
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc -lsim -lc -lhandlers-sim -lhal"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"  
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+    %{static:-static}}}"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"."
+
+/* Avoid dots for compatibility with VxWorks.  */
+#undef NO_DOLLAR_IN_LABEL
+#define NO_DOT_IN_LABEL
+
+/* Do not force "-fpic" for this target.  */
+#define XTENSA_ALWAYS_PIC 0
+
+#undef DBX_REGISTER_NUMBER
+
+/* Search for headers in $tooldir/arch/include and for libraries and
+   startfiles in $tooldir/arch/lib.  */
+#define GCC_DRIVER_HOST_INITIALIZATION \
+do \
+{ \
+  char *tooldir, *archdir; \
+  tooldir = concat (tooldir_base_prefix, spec_machine, \
+		    dir_separator_str, NULL); \
+  if (!IS_ABSOLUTE_PATH (tooldir)) \
+    tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \
+		      spec_version, dir_separator_str, tooldir, NULL); \
+  archdir = concat (tooldir, "arch", dir_separator_str, NULL); \
+  add_prefix (&startfile_prefixes, \
+	      concat (archdir, "lib", dir_separator_str, NULL), \
+	      "GCC", PREFIX_PRIORITY_LAST, 0, 1); \
+  add_prefix (&include_prefixes, archdir, \
+	      "GCC", PREFIX_PRIORITY_LAST, 0, 0); \
+  } \
+while (0)
diff --git a/gcc/config/xtensa/elf.opt b/gcc/config/xtensa/elf.opt
new file mode 100644
index 000000000..bdeac15b2
--- /dev/null
+++ b/gcc/config/xtensa/elf.opt
@@ -0,0 +1,30 @@
+; Xtensa ELF (bare metal) options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+rdynamic
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S
new file mode 100644
index 000000000..9b46889bd
--- /dev/null
+++ b/gcc/config/xtensa/ieee754-df.S
@@ -0,0 +1,2388 @@
+/* IEEE-754 double-precision functions for Xtensa
+   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/*  Warning!  The branch displacements for some Xtensa branch instructions
+    are quite small, and this code has been carefully laid out to keep
+    branch targets in range.  If you change anything, be sure to check that
+    the assembler is not relaxing anything to branch over a jump.  */
+
+#ifdef L_negdf2
+
+	.align	4
+	.global	__negdf2
+	.type	__negdf2, @function
+__negdf2:
+	leaf_entry sp, 16
+	movi	a4, 0x80000000
+	xor	xh, xh, a4
+	leaf_return
+
+#endif /* L_negdf2 */
+
+#ifdef L_addsubdf3
+
+	/* Addition */
+__adddf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Ladd_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	yh, a6, 1f
+	/* If x is a NaN, return it.  Otherwise, return y.  */
+	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, .Ladd_ynan_or_inf
+1:	leaf_return
+
+.Ladd_ynan_or_inf:
+	/* Return y.  */
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ladd_opposite_signs:
+	/* Operand signs differ.  Do a subtraction.  */
+	slli	a7, a6, 11
+	xor	yh, yh, a7
+	j	.Lsub_same_sign
+
+	.align	4
+	.global	__adddf3
+	.type	__adddf3, @function
+__adddf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, xh, yh
+	bltz	a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:	
+	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
+	ball	xh, a6, .Ladd_xnan_or_inf
+	ball	yh, a6, .Ladd_ynan_or_inf
+
+	/* Compare the exponents.  The smaller operand will be shifted
+	   right by the exponent difference and added to the larger
+	   one.  */
+	extui	a7, xh, 20, 12
+	extui	a8, yh, 20, 12
+	bltu	a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	yh, a6, .Ladd_yexpzero
+
+	/* Replace yh sign/exponent with 0x001.  */
+	or	yh, yh, a6
+	slli	yh, yh, 11
+	srli	yh, yh, 11
+
+.Ladd_yexpdiff:
+	/* Compute the exponent difference.  Optimize for difference < 32.  */
+	sub	a10, a7, a8
+	bgeui	a10, 32, .Ladd_bigshifty
+	
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out of yl are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, yl, a9
+	src	yl, yh, yl
+	srl	yh, yh
+
+.Ladd_addy:
+	/* Do the 64-bit addition.  */
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, xh, 20, 12
+	beq	a10, a7, .Ladd_round
+	mov	a8, a7
+	j	.Ladd_carry
+
+.Ladd_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0", and increment the apparent exponent
+	   because subnormals behave as if they had the minimum (nonzero)
+	   exponent.  Test for the case when both exponents are zero.  */
+	slli	yh, yh, 12
+	srli	yh, yh, 12
+	bnone	xh, a6, .Ladd_bothexpzero
+	addi	a8, a8, 1
+	j	.Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+	/* Both exponents are zero.  Handle this as a special case.  There
+	   is no need to shift or round, and the normal code for handling
+	   a carry into the exponent field will not work because it
+	   assumes there is an implicit "1.0" that needs to be added.  */
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:	leaf_return
+
+.Ladd_bigshifty:
+	/* Exponent difference > 64 -- just return the bigger value.  */
+	bgeui	a10, 64, 1b
+
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out are saved in a9 for rounding the result.  */
+	ssr	a10
+	sll	a11, yl		/* lost bits shifted out of yl */
+	src	a9, yh, yl
+	srl	yl, yh
+	movi	yh, 0
+	beqz	a11, .Ladd_addy
+	or	a9, a9, a10	/* any positive, nonzero value will work */
+	j	.Ladd_addy
+
+.Ladd_xexpzero:
+	/* Same as "yexpzero" except skip handling the case when both
+	   exponents are zero.  */
+	slli	xh, xh, 12
+	srli	xh, xh, 12
+	addi	a7, a7, 1
+	j	.Ladd_xexpdiff
+
+.Ladd_shiftx:
+	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
+	   because the exponent difference is always nonzero in this version,
+	   the shift sequence can use SLL and skip loading a constant zero.  */
+	bnone	xh, a6, .Ladd_xexpzero
+
+	or	xh, xh, a6
+	slli	xh, xh, 11
+	srli	xh, xh, 11
+
+.Ladd_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Ladd_bigshiftx
+	
+	ssr	a10
+	sll	a9, xl
+	src	xl, xh, xl
+	srl	xh, xh
+
+.Ladd_addx:
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, xh, 20, 12
+	bne	a10, a8, .Ladd_carry
+
+.Ladd_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Ladd_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_bigshiftx:
+	/* Mostly the same thing as "bigshifty"....  */
+	bgeui	a10, 64, .Ladd_returny
+
+	ssr	a10
+	sll	a11, xl
+	src	a9, xh, xl
+	srl	xl, xh
+	movi	xh, 0
+	beqz	a11, .Ladd_addx
+	or	a9, a9, a10
+	j	.Ladd_addx
+
+.Ladd_returny:
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ladd_carry:	
+	/* The addition has overflowed into the exponent field, so the
+	   value needs to be renormalized.  The mantissa of the result
+	   can be recovered by subtracting the original exponent and
+	   adding 0x100000 (which is the explicit "1.0" for the
+	   mantissa of the non-shifted operand -- the "1.0" for the
+	   shifted operand was already added).  The mantissa can then
+	   be shifted right by one bit.  The explicit "1.0" of the
+	   shifted mantissa then needs to be replaced by the exponent,
+	   incremented by one to account for the normalizing shift.
+	   It is faster to combine these operations: do the shift first
+	   and combine the additions and subtractions.  If x is the
+	   original exponent, the result is:
+	       shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
+	   or:
+	       shifted mantissa + ((x + 1) << 19)
+	   Note that the exponent is incremented here by leaving the
+	   explicit "1.0" of the mantissa in the exponent field.  */
+
+	/* Shift xh/xl right by one bit.  Save the lsb of xl.  */
+	mov	a10, xl
+	ssai	1
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* See explanation above.  The original exponent is in a8.  */
+	addi	a8, a8, 1
+	slli	a8, a8, 19
+	add	xh, xh, a8
+
+	/* Return an Infinity if the exponent overflowed.  */
+	ball	xh, a6, .Ladd_infinity
+	
+	/* Same thing as the "round" code except the msb of the leftover
+	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
+	bbci.l	a10, 0, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Ladd_roundcarry
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_infinity:
+	/* Clear the mantissa.  */
+	movi	xl, 0
+	srli	xh, xh, 20
+	slli	xh, xh, 20
+
+	/* The sign bit may have been lost in a carry-out.  Put it back.  */
+	slli	a8, a8, 1
+	or	xh, xh, a8
+	leaf_return
+
+.Ladd_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Ladd_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+
+	/* Subtraction */
+__subdf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Lsub_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	yh, a6, 1f
+	/* Both x and y are either NaN or Inf, so the result is NaN.  */
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	leaf_return
+
+.Lsub_ynan_or_inf:
+	/* Negate y and return it.  */
+	slli	a7, a6, 11
+	xor	xh, yh, a7
+	mov	xl, yl
+	leaf_return
+
+.Lsub_opposite_signs:
+	/* Operand signs differ.  Do an addition.  */
+	slli	a7, a6, 11
+	xor	yh, yh, a7
+	j	.Ladd_same_sign
+
+	.align	4
+	.global	__subdf3
+	.type	__subdf3, @function
+__subdf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, xh, yh
+	bltz	a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:	
+	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
+	ball	xh, a6, .Lsub_xnan_or_inf
+	ball	yh, a6, .Lsub_ynan_or_inf
+
+	/* Compare the operands.  In contrast to addition, the entire
+	   value matters here.  */
+	extui	a7, xh, 20, 11
+	extui	a8, yh, 20, 11
+	bltu	xh, yh, .Lsub_xsmaller
+	beq	xh, yh, .Lsub_compare_low
+
+.Lsub_ysmaller:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	yh, a6, .Lsub_yexpzero
+
+	/* Replace yh sign/exponent with 0x001.  */
+	or	yh, yh, a6
+	slli	yh, yh, 11
+	srli	yh, yh, 11
+
+.Lsub_yexpdiff:
+	/* Compute the exponent difference.  Optimize for difference < 32.  */
+	sub	a10, a7, a8
+	bgeui	a10, 32, .Lsub_bigshifty
+	
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out of yl are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, yl, a9
+	src	yl, yh, yl
+	srl	yh, yh
+
+.Lsub_suby:
+	/* Do the 64-bit subtraction.  */
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from xh/xl.  */
+	neg	a9, a9
+	beqz	a9, 1f
+	addi	a5, xh, -1
+	moveqz	xh, a5, xl
+	addi	xl, xl, -1
+1:
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, xh, 20, 11
+	beq	a10, a7, .Lsub_round
+	j	.Lsub_borrow
+
+.Lsub_compare_low:
+	/* The high words are equal.  Compare the low words.  */
+	bltu	xl, yl, .Lsub_xsmaller
+	bltu	yl, xl, .Lsub_ysmaller
+	/* The operands are equal.  Return 0.0.  */
+	movi	xh, 0
+	movi	xl, 0
+1:	leaf_return
+
+.Lsub_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
+	   y's apparent exponent because subnormals behave as if they had
+	   the minimum (nonzero) exponent.  */
+	slli	yh, yh, 12
+	srli	yh, yh, 12
+	bnone	xh, a6, .Lsub_yexpdiff
+	addi	a8, a8, 1
+	j	.Lsub_yexpdiff
+
+.Lsub_bigshifty:
+	/* Exponent difference > 64 -- just return the bigger value.  */
+	bgeui	a10, 64, 1b
+
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out are saved in a9 for rounding the result.  */
+	ssr	a10
+	sll	a11, yl		/* lost bits shifted out of yl */
+	src	a9, yh, yl
+	srl	yl, yh
+	movi	yh, 0
+	beqz	a11, .Lsub_suby
+	or	a9, a9, a10	/* any positive, nonzero value will work */
+	j	.Lsub_suby
+
+.Lsub_xsmaller:
+	/* Same thing as the "ysmaller" code, but with x and y swapped and
+	   with y negated.  */
+	bnone	xh, a6, .Lsub_xexpzero
+
+	or	xh, xh, a6
+	slli	xh, xh, 11
+	srli	xh, xh, 11
+
+.Lsub_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Lsub_bigshiftx
+	
+	ssr	a10
+	movi	a9, 0
+	src	a9, xl, a9
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* Negate y.  */
+	slli	a11, a6, 11
+	xor	yh, yh, a11
+
+.Lsub_subx:
+	sub	xl, yl, xl
+	sub	xh, yh, xh
+	bgeu	yl, xl, 1f
+	addi	xh, xh, -1
+1:
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from xh/xl.  */
+	neg	a9, a9
+	beqz	a9, 1f
+	addi	a5, xh, -1
+	moveqz	xh, a5, xl
+	addi	xl, xl, -1
+1:
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, xh, 20, 11
+	bne	a10, a8, .Lsub_borrow
+
+.Lsub_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Lsub_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Lsub_exactlyhalf
+1:	leaf_return
+
+.Lsub_xexpzero:
+	/* Same as "yexpzero".  */
+	slli	xh, xh, 12
+	srli	xh, xh, 12
+	bnone	yh, a6, .Lsub_xexpdiff
+	addi	a7, a7, 1
+	j	.Lsub_xexpdiff
+
+.Lsub_bigshiftx:
+	/* Mostly the same thing as "bigshifty", but with the sign bit of the
+	   shifted value set so that the subsequent subtraction flips the
+	   sign of y.  */
+	bgeui	a10, 64, .Lsub_returny
+
+	ssr	a10
+	sll	a11, xl
+	src	a9, xh, xl
+	srl	xl, xh
+	slli	xh, a6, 11	/* set sign bit of xh */
+	beqz	a11, .Lsub_subx
+	or	a9, a9, a10
+	j	.Lsub_subx
+
+.Lsub_returny:
+	/* Negate and return y.  */
+	slli	a7, a6, 11
+	xor	xh, yh, a7
+	mov	xl, yl
+	leaf_return
+
+.Lsub_borrow:	
+	/* The subtraction has underflowed into the exponent field, so the
+	   value needs to be renormalized.  Shift the mantissa left as
+	   needed to remove any leading zeros and adjust the exponent
+	   accordingly.  If the exponent is not large enough to remove
+	   all the leading zeros, the result will be a subnormal value.  */
+
+	slli	a8, xh, 12
+	beqz	a8, .Lsub_xhzero
+	do_nsau	a6, a8, a7, a11
+	srli	a8, a8, 12
+	bge	a6, a10, .Lsub_subnormal
+	addi	a6, a6, 1
+
+.Lsub_shift_lt32:
+	/* Shift the mantissa (a8/xl/a9) left by a6.  */
+	ssl	a6
+	src	a8, a8, xl
+	src	xl, xl, a9
+	sll	a9, a9
+
+	/* Combine the shifted mantissa with the sign and exponent,
+	   decrementing the exponent by a6.  (The exponent has already
+	   been decremented by one due to the borrow from the subtraction,
+	   but adding the mantissa will increment the exponent by one.)  */
+	srli	xh, xh, 20
+	sub	xh, xh, a6
+	slli	xh, xh, 20
+	add	xh, xh, a8
+	j	.Lsub_round
+
+.Lsub_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Lsub_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+.Lsub_xhzero:
+	/* When normalizing the result, all the mantissa bits in the high
+	   word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
+	do_nsau	a6, xl, a7, a11
+	addi	a6, a6, 21
+	blt	a10, a6, .Lsub_subnormal
+
+.Lsub_normalize_shift:
+	bltui	a6, 32, .Lsub_shift_lt32
+
+	ssl	a6
+	src	a8, xl, a9
+	sll	xl, a9
+	movi	a9, 0
+
+	srli	xh, xh, 20
+	sub	xh, xh, a6
+	slli	xh, xh, 20
+	add	xh, xh, a8
+	j	.Lsub_round
+
+.Lsub_subnormal:
+	/* The exponent is too small to shift away all the leading zeros.
+	   Set a6 to the current exponent (which has already been
+	   decremented by the borrow) so that the exponent of the result
+	   will be zero.  Do not add 1 to a6 in this case, because: (1)
+	   adding the mantissa will not increment the exponent, so there is
+	   no need to subtract anything extra from the exponent to
+	   compensate, and (2) the effective exponent of a subnormal is 1
+	   not 0 so the shift amount must be 1 smaller than normal. */
+	mov	a6, a10
+	j	.Lsub_normalize_shift
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_muldf3
+
+	/* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__muldf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Lmul_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+
+	/* If x is zero, return zero.  */
+	or	a10, xh, xl
+	beqz	a10, .Lmul_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	beqz	xh, .Lmul_xh_zero
+	do_nsau	a10, xh, a11, a12
+	addi	a10, a10, -11
+	ssl	a10
+	src	xh, xh, xl
+	sll	xl, xl
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Lmul_xnormalized	
+.Lmul_xh_zero:
+	do_nsau	a10, xl, a11, a12
+	addi	a10, a10, -11
+	movi	a8, -31
+	sub	a8, a8, a10
+	ssl	a10
+	bltz	a10, .Lmul_xl_srl
+	sll	xh, xl
+	movi	xl, 0
+	j	.Lmul_xnormalized
+.Lmul_xl_srl:
+	srl	xh, xl
+	sll	xl, xl
+	j	.Lmul_xnormalized
+	
+.Lmul_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	yh, yh, 1
+	srli	yh, yh, 1
+
+	/* If y is zero, return zero.  */
+	or	a10, yh, yl
+	beqz	a10, .Lmul_return_zero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	beqz	yh, .Lmul_yh_zero
+	do_nsau	a10, yh, a11, a12
+	addi	a10, a10, -11
+	ssl	a10
+	src	yh, yh, yl
+	sll	yl, yl
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Lmul_ynormalized	
+.Lmul_yh_zero:
+	do_nsau	a10, yl, a11, a12
+	addi	a10, a10, -11
+	movi	a9, -31
+	sub	a9, a9, a10
+	ssl	a10
+	bltz	a10, .Lmul_yl_srl
+	sll	yh, yl
+	movi	yl, 0
+	j	.Lmul_ynormalized
+.Lmul_yl_srl:
+	srl	yh, yl
+	sll	yl, yl
+	j	.Lmul_ynormalized	
+
+.Lmul_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	j	.Lmul_done
+
+.Lmul_xnan_or_inf:
+	/* If y is zero, return NaN.  */
+	bnez	yl, 1f
+	slli	a8, yh, 1
+	bnez	a8, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+	j	.Lmul_done
+1:
+	/* If y is NaN, return y.  */
+	bnall	yh, a6, .Lmul_returnx
+	slli	a8, yh, 12
+	or	a8, a8, yl
+	beqz	a8, .Lmul_returnx
+
+.Lmul_returny:
+	mov	xh, yh
+	mov	xl, yl
+
+.Lmul_returnx:
+	/* Set the sign bit and return.  */
+	extui	a7, a7, 31, 1
+	slli	xh, xh, 1
+	ssai	1
+	src	xh, a7, xh
+	j	.Lmul_done
+
+.Lmul_ynan_or_inf:
+	/* If x is zero, return NaN.  */
+	bnez	xl, .Lmul_returny
+	slli	a8, xh, 1
+	bnez	a8, .Lmul_returny
+	movi	a7, 0x80000	/* make it a quiet NaN */
+	or	xh, yh, a7
+	j	.Lmul_done
+
+	.align	4
+	.global	__muldf3
+	.type	__muldf3, @function
+__muldf3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 64
+#else
+	leaf_entry sp, 32
+#endif
+	movi	a6, 0x7ff00000
+
+	/* Get the sign of the result.  */
+	xor	a7, xh, yh
+
+	/* Check for NaN and infinity.  */
+	ball	xh, a6, .Lmul_xnan_or_inf
+	ball	yh, a6, .Lmul_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, xh, 20, 11
+	extui	a9, yh, 20, 11
+
+	beqz	a8, .Lmul_xexpzero
+.Lmul_xnormalized:	
+	beqz	a9, .Lmul_yexpzero
+.Lmul_ynormalized:	
+
+	/* Add the exponents.  */
+	add	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0x1fffff
+	or	xh, xh, a6
+	and	xh, xh, a10
+	or	yh, yh, a6
+	and	yh, yh, a10
+
+	/* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
+	   The least-significant word of the result is thrown away except
+	   that if it is nonzero, the lsb of a6 is set to 1.  */
+#if XCHAL_HAVE_MUL32_HIGH
+
+	/* Compute a6 with any carry-outs in a10.  */
+	movi	a10, 0
+	mull	a6, xl, yh
+	mull	a11, xh, yl
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:
+	muluh	a11, xl, yl
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:	
+	/* If the low word of the result is nonzero, set the lsb of a6.  */
+	mull	a11, xl, yl
+	beqz	a11, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+1:
+	/* Compute xl with any carry-outs in a9.  */
+	movi	a9, 0
+	mull	a11, xh, yh
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:	
+	muluh	a11, xh, yl
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:	
+	muluh	xl, xl, yh
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute xh.  */
+	muluh	xh, xh, yh
+	add	xh, xh, a9
+
+#else /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
+	   products.  These partial products are:
+
+		0 xll * yll
+
+		1 xll * ylh
+		2 xlh * yll
+
+		3 xll * yhl
+		4 xlh * ylh
+		5 xhl * yll
+
+		6 xll * yhh
+		7 xlh * yhl
+		8 xhl * ylh
+		9 xhh * yll
+
+		10 xlh * yhh
+		11 xhl * yhl
+		12 xhh * ylh
+
+		13 xhl * yhh
+		14 xhh * yhl
+
+		15 xhh * yhh
+
+	   where the input chunks are (hh, hl, lh, ll).  If using the Mul16
+	   or Mul32 multiplier options, these input chunks must be stored in
+	   separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
+	   that the inputs come from either half of the registers, so there
+	   is no need to shift them out ahead of time.  If there is no
+	   multiply hardware, the 16-bit chunks can be extracted when setting
+	   up the arguments to the separate multiply function.  */
+
+	/* Save a7 since it is needed to hold a temporary value.  */
+	s32i	a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Calling a separate multiply function will clobber a0 and requires
+	   use of a8 as a temporary, so save those values now.  (The function
+	   uses a custom ABI so nothing else needs to be saved.)  */
+	s32i	a0, sp, 0
+	s32i	a8, sp, 8
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define xlh a12
+#define ylh a13
+#define xhh a14
+#define yhh a15
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	xlh, xl, 16
+	srli	ylh, yl, 16
+	srli	xhh, xh, 16
+	srli	yhh, yh, 16
+
+#define xll xl
+#define yll yl
+#define xhl xh
+#define yhl yh
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	xl, xl, 0, 16
+	extui	xh, xh, 0, 16
+	extui	yl, yl, 0, 16
+	extui	yh, yh, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+	
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a10 with carry-out in a9.  */
+	do_mul(a10, xl, l, yl, h)	/* pp 1 */
+	do_mul(a11, xl, h, yl, l)	/* pp 2 */
+	movi	a9, 0
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Initialize a6 with a9/a10 shifted into position.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a6, a9, a10
+
+	/* Compute the low word into a10.  */
+	do_mul(a11, xl, l, yl, l)	/* pp 0 */
+	sll	a10, a10
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a6, a6, 1
+1:
+	/* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
+	   This is good enough to determine the low half of a6, so that any
+	   nonzero bits from the low word of the result can be collapsed
+	   into a6, freeing up a register.  */
+	movi	a9, 0
+	do_mul(a11, xl, l, yh, l)	/* pp 3 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a11, xl, h, yl, h)	/* pp 4 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a11, xh, l, yl, l)	/* pp 5 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Collapse any nonzero bits from the low word into a6.  */
+	beqz	a10, 1f
+	movi	a11, 1
+	or	a6, a6, a11
+1:
+	/* Add pp6-9 into a11 with carry-outs in a10.  */
+	do_mul(a7, xl, l, yh, h)	/* pp 6 */
+	do_mul(a11, xh, h, yl, l)	/* pp 9 */
+	movi	a10, 0
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	do_mul(a7, xl, h, yh, l)	/* pp 7 */
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	do_mul(a7, xh, l, yl, h)	/* pp 8 */
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
+	src	a10, a10, a11
+	add	a10, a10, a9
+	sll	a11, a11
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:
+	/* Add pp10-12 into xl with carry-outs in a9.  */
+	movi	a9, 0
+	do_mul(xl, xl, h, yh, h)	/* pp 10 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a10, xh, l, yh, l)	/* pp 11 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a10, xh, h, yl, h)	/* pp 12 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	/* Add pp13-14 into a11 with carry-outs in a10.  */
+	do_mul(a11, xh, l, yh, h)	/* pp 13 */
+	do_mul(a7, xh, h, yh, l)	/* pp 14 */
+	movi	a10, 0
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:
+	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
+	src	a10, a10, a11
+	add	a10, a10, a9
+	sll	a11, a11
+	add	xl, xl, a11
+	bgeu	xl, a11, 1f
+	addi	a10, a10, 1
+1:
+	/* Compute xh.  */
+	do_mul(xh, xh, h, yh, h)	/* pp 15 */
+	add	xh, xh, a10
+
+	/* Restore values saved on the stack during the multiplication.  */
+	l32i	a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	l32i	a0, sp, 0
+	l32i	a8, sp, 8
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Shift left by 12 bits, unless there was a carry-out from the
+	   multiply, in which case, shift by 11 bits and increment the
+	   exponent.  Note: It is convenient to use the constant 0x3ff
+	   instead of 0x400 when removing the extra exponent bias (so that
+	   it is easy to construct 0x7fe for the overflow check).  Reverse
+	   the logic here to decrement the exponent sum by one unless there
+	   was a carry-out.  */
+	movi	a4, 11
+	srli	a5, xh, 21 - 12
+	bnez	a5, 1f
+	addi	a4, a4, 1
+	addi	a8, a8, -1
+1:	ssl	a4
+	src	xh, xh, xl
+	src	xl, xl, a6
+	sll	a6, a6
+
+	/* Subtract the extra bias from the exponent sum (plus one to account
+	   for the explicit "1.0" of the mantissa that will be added to the
+	   exponent in the final result).  */
+	movi	a4, 0x3ff
+	sub	a8, a8, a4
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..7fd are OK here.  */
+	slli	a4, a4, 1	/* 0x7fe */
+	bgeu	a8, a4, .Lmul_overflow
+	
+.Lmul_round:
+	/* Round.  */
+	bgez	a6, .Lmul_rounded
+	addi	xl, xl, 1
+	beqz	xl, .Lmul_roundcarry
+	slli	a6, a6, 1
+	beqz	a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 20
+	add	xh, xh, a8
+
+.Lmul_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	xh, xh, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+.Lmul_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	j	.Lmul_rounded
+
+.Lmul_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow is OK -- it will be added to the exponent.  */
+	j	.Lmul_rounded
+
+.Lmul_overflow:
+	bltz	a8, .Lmul_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a4, 1	/* 0x7ff */
+	slli	xh, a8, 20
+	movi	xl, 0
+	j	.Lmul_addsign
+
+.Lmul_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	mov	a9, a6
+	ssr	a8
+	bgeui	a8, 32, .Lmul_bigshift
+	
+	/* Shift xh/xl right.  Any bits that are shifted out of xl are saved
+	   in a6 (combined with the shifted-out bits currently in a6) for
+	   rounding the result.  */
+	sll	a6, xl
+	src	xl, xh, xl
+	srl	xh, xh
+	j	1f
+
+.Lmul_bigshift:
+	bgeui	a8, 64, .Lmul_flush_to_zero
+	sll	a10, xl		/* lost bits shifted out of xl */
+	src	a6, xh, xl
+	srl	xl, xh
+	movi	xh, 0
+	or	a9, a9, a10
+
+	/* Set the exponent to zero.  */
+1:	movi	a8, 0
+
+	/* Pack any nonzero bits shifted out into a6.  */
+	beqz	a9, .Lmul_round
+	movi	a9, 1
+	or	a6, a6, a9
+	j	.Lmul_round
+	
+.Lmul_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	j	.Lmul_done
+
+#if XCHAL_NO_MUL
+	
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_muldf3 */
+
+#ifdef L_divdf3
+
+	/* Division */
+__divdf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Ldiv_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	yh, yh, 1
+	srli	yh, yh, 1
+
+	/* Check for division by zero.  */
+	or	a10, yh, yl
+	beqz	a10, .Ldiv_yzero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	beqz	yh, .Ldiv_yh_zero
+	do_nsau	a10, yh, a11, a9
+	addi	a10, a10, -11
+	ssl	a10
+	src	yh, yh, yl
+	sll	yl, yl
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Ldiv_ynormalized	
+.Ldiv_yh_zero:
+	do_nsau	a10, yl, a11, a9
+	addi	a10, a10, -11
+	movi	a9, -31
+	sub	a9, a9, a10
+	ssl	a10
+	bltz	a10, .Ldiv_yl_srl
+	sll	yh, yl
+	movi	yl, 0
+	j	.Ldiv_ynormalized
+.Ldiv_yl_srl:
+	srl	yh, yl
+	sll	yl, yl
+	j	.Ldiv_ynormalized	
+
+.Ldiv_yzero:
+	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+	or	xl, xl, xh
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	or	xh, xh, a6
+	bnez	xl, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	movi	xl, 0
+	leaf_return
+
+.Ldiv_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+
+	/* If x is zero, return zero.  */
+	or	a10, xh, xl
+	beqz	a10, .Ldiv_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	beqz	xh, .Ldiv_xh_zero
+	do_nsau	a10, xh, a11, a8
+	addi	a10, a10, -11
+	ssl	a10
+	src	xh, xh, xl
+	sll	xl, xl
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Ldiv_xnormalized	
+.Ldiv_xh_zero:
+	do_nsau	a10, xl, a11, a8
+	addi	a10, a10, -11
+	movi	a8, -31
+	sub	a8, a8, a10
+	ssl	a10
+	bltz	a10, .Ldiv_xl_srl
+	sll	xh, xl
+	movi	xl, 0
+	j	.Ldiv_xnormalized
+.Ldiv_xl_srl:
+	srl	xh, xl
+	sll	xl, xl
+	j	.Ldiv_xnormalized
+	
+.Ldiv_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	leaf_return
+
+.Ldiv_xnan_or_inf:
+	/* Set the sign bit of the result.  */
+	srli	a7, yh, 31
+	slli	a7, a7, 31
+	xor	xh, xh, a7
+	/* If y is NaN or Inf, return NaN.  */
+	bnall	yh, a6, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	leaf_return
+
+.Ldiv_ynan_or_inf:
+	/* If y is Infinity, return zero.  */
+	slli	a8, yh, 12
+	or	a8, a8, yl
+	beqz	a8, .Ldiv_return_zero
+	/* y is NaN; return it.  */
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ldiv_highequal1:
+	bltu	xl, yl, 2f
+	j	3f
+
+	.align	4
+	.global	__divdf3
+	.type	__divdf3, @function
+__divdf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Get the sign of the result.  */
+	xor	a7, xh, yh
+
+	/* Check for NaN and infinity.  */
+	ball	xh, a6, .Ldiv_xnan_or_inf
+	ball	yh, a6, .Ldiv_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, xh, 20, 11
+	extui	a9, yh, 20, 11
+
+	beqz	a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:	
+	beqz	a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:	
+
+	/* Subtract the exponents.  */
+	sub	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0x1fffff
+	or	xh, xh, a6
+	and	xh, xh, a10
+	or	yh, yh, a6
+	and	yh, yh, a10
+
+	/* Set SAR for left shift by one.  */
+	ssai	(32 - 1)
+
+	/* The first digit of the mantissa division must be a one.
+	   Shift x (and adjust the exponent) as needed to make this true.  */
+	bltu	yh, xh, 3f
+	beq	yh, xh, .Ldiv_highequal1
+2:	src	xh, xh, xl
+	sll	xl, xl
+	addi	a8, a8, -1
+3:
+	/* Do the first subtraction and shift.  */
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+	src	xh, xh, xl
+	sll	xl, xl
+
+	/* Put the quotient into a10/a11.  */
+	movi	a10, 0
+	movi	a11, 1
+
+	/* Divide one bit at a time for 52 bits.  */
+	movi	a9, 52
+#if XCHAL_HAVE_LOOPS
+	loop	a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+	/* Shift the quotient << 1.  */
+	src	a10, a10, a11
+	sll	a11, a11
+
+	/* Is this digit a 0 or 1?  */
+	bltu	xh, yh, 3f
+	beq	xh, yh, .Ldiv_highequal2
+
+	/* Output a 1 and subtract.  */
+2:	addi	a11, a11, 1
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+
+	/* Shift the dividend << 1.  */
+3:	src	xh, xh, xl
+	sll	xl, xl
+
+#if !XCHAL_HAVE_LOOPS
+	addi	a9, a9, -1
+	bnez	a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+	/* Add the exponent bias (less one to account for the explicit "1.0"
+	   of the mantissa that will be added to the exponent in the final
+	   result).  */
+	movi	a9, 0x3fe
+	add	a8, a8, a9
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..7fd are OK here.  */
+	addmi	a9, a9, 0x400	/* 0x7fe */
+	bgeu	a8, a9, .Ldiv_overflow
+
+.Ldiv_round:
+	/* Round.  The remainder (<< 1) is in xh/xl.  */
+	bltu	xh, yh, .Ldiv_rounded
+	beq	xh, yh, .Ldiv_highequal3
+.Ldiv_roundup:
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+
+.Ldiv_rounded:
+	mov	xl, a11
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 20
+	add	xh, a10, a8
+
+.Ldiv_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	xh, xh, a7
+	leaf_return
+
+.Ldiv_highequal2:
+	bgeu	xl, yl, 2b
+	j	3b
+
+.Ldiv_highequal3:
+	bltu	xl, yl, .Ldiv_rounded
+	bne	xl, yl, .Ldiv_roundup
+
+	/* Remainder is exactly half the divisor.  Round even.  */
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+	srli	a11, a11, 1
+	slli	a11, a11, 1
+	j	.Ldiv_rounded
+
+.Ldiv_overflow:
+	bltz	a8, .Ldiv_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a9, 1	/* 0x7ff */
+	slli	xh, a8, 20
+	movi	xl, 0
+	j	.Ldiv_addsign
+
+.Ldiv_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	ssr	a8
+	bgeui	a8, 32, .Ldiv_bigshift
+	
+	/* Shift a10/a11 right.  Any bits that are shifted out of a11 are
+	   saved in a6 for rounding the result.  */
+	sll	a6, a11
+	src	a11, a10, a11
+	srl	a10, a10
+	j	1f
+
+.Ldiv_bigshift:
+	bgeui	a8, 64, .Ldiv_flush_to_zero
+	sll	a9, a11		/* lost bits shifted out of a11 */
+	src	a6, a10, a11
+	srl	a11, a10
+	movi	a10, 0
+	or	xl, xl, a9
+
+	/* Set the exponent to zero.  */
+1:	movi	a8, 0
+
+	/* Pack any nonzero remainder (in xh/xl) into a6.  */
+	or	xh, xh, xl
+	beqz	xh, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+	
+	/* Round a10/a11 based on the bits shifted out into a6.  */
+1:	bgez	a6, .Ldiv_rounded
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+	slli	a6, a6, 1
+	bnez	a6, .Ldiv_rounded
+	srli	a11, a11, 1
+	slli	a11, a11, 1
+	j	.Ldiv_rounded
+
+.Ldiv_roundcarry:
+	/* a11 is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	a10, a10, 1
+	/* Overflow to the exponent field is OK.  */
+	j	.Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_divdf3 */
+
+#ifdef L_cmpdf2
+
+	/* Equal and Not Equal */
+
+	.align	4
+	.global	__eqdf2
+	.global	__nedf2
+	.set	__nedf2, __eqdf2
+	.type	__eqdf2, @function
+__eqdf2:
+	leaf_entry sp, 16
+	bne	xl, yl, 2f
+	bne	xh, yh, 4f
+
+	/* The values are equal but NaN != NaN.  Check the exponent.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 3f
+
+	/* Equal.  */
+	movi	a2, 0
+	leaf_return
+
+	/* Not equal.  */
+2:	movi	a2, 1
+	leaf_return
+
+	/* Check if the mantissas are nonzero.  */
+3:	slli	a7, xh, 12
+	or	a7, a7, xl
+	j	5f
+
+	/* Check if x and y are zero with different signs.  */
+4:	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl	/* xl == yl here */
+
+	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+	   or x when exponent(x) = 0x7ff and x == y.  */
+5:	movi	a2, 0
+	movi	a3, 1
+	movnez	a2, a3, a7	
+	leaf_return
+
+
+	/* Greater Than */
+
+	.align	4
+	.global	__gtdf2
+	.type	__gtdf2, @function
+__gtdf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Lle_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+
+	/* Less Than or Equal */
+
+	.align	4
+	.global	__ledf2
+	.type	__ledf2, @function
+__ledf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Lle_cmp
+	movi	a2, 1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+.Lle_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, xh, yh
+	bltz	a7, .Lle_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	xh, .Lle_xneg
+
+	/* Check if x <= y.  */
+	bltu	xh, yh, 4f
+	bne	xh, yh, 5f
+	bltu	yl, xl, 5f
+4:	movi	a2, 0
+	leaf_return
+
+.Lle_xneg:
+	/* Check if y <= x.  */
+	bltu	yh, xh, 4b
+	bne	yh, xh, 5f
+	bgeu	xl, yl, 4b
+5:	movi	a2, 1
+	leaf_return
+
+.Lle_diff_signs:
+	bltz	xh, 4b
+
+	/* Check if both x and y are zero.  */
+	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl
+	or	a7, a7, yl
+	movi	a2, 1
+	movi	a3, 0
+	moveqz	a2, a3, a7
+	leaf_return
+
+
+	/* Greater Than or Equal */
+
+	.align	4
+	.global	__gedf2
+	.type	__gedf2, @function
+__gedf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Llt_cmp
+	movi	a2, -1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, -1
+	leaf_return
+
+
+	/* Less Than */
+
+	.align	4
+	.global	__ltdf2
+	.type	__ltdf2, @function
+__ltdf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Llt_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+.Llt_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, xh, yh
+	bltz	a7, .Llt_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	xh, .Llt_xneg
+
+	/* Check if x < y.  */
+	bltu	xh, yh, 4f
+	bne	xh, yh, 5f
+	bgeu	xl, yl, 5f
+4:	movi	a2, -1
+	leaf_return
+
+.Llt_xneg:
+	/* Check if y < x.  */
+	bltu	yh, xh, 4b
+	bne	yh, xh, 5f
+	bltu	yl, xl, 4b
+5:	movi	a2, 0
+	leaf_return
+
+.Llt_diff_signs:
+	bgez	xh, 5b
+
+	/* Check if both x and y are nonzero.  */
+	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl
+	or	a7, a7, yl
+	movi	a2, 0
+	movi	a3, -1
+	movnez	a2, a3, a7
+	leaf_return
+
+
+	/* Unordered */
+
+	.align	4
+	.global	__unorddf2
+	.type	__unorddf2, @function
+__unorddf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 3f
+1:	ball	yh, a6, 4f
+2:	movi	a2, 0
+	leaf_return
+
+3:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+4:	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, 2b
+	movi	a2, 1
+	leaf_return
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_fixdfsi
+
+	.align	4
+	.global	__fixdfsi
+	.type	__fixdfsi, @function
+__fixdfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixdfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 19, 10	/* 0x3fe */
+	sub	a4, a4, a5
+	bgei	a4, 32, .Lfixdfsi_maxint
+	blti	a4, 1, .Lfixdfsi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	a5, a7, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixdfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixdfsi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	xh, 0
+
+.Lfixdfsi_maxint:
+	slli	a4, a6, 11	/* 0x80000000 */
+	addi	a5, a4, -1	/* 0x7fffffff */
+	movgez	a4, a5, xh
+	mov	a2, a4
+	leaf_return
+
+.Lfixdfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+#endif /* L_fixdfsi */
+
+#ifdef L_fixdfdi
+
+	.align	4
+	.global	__fixdfdi
+	.type	__fixdfdi, @function
+__fixdfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixdfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 19, 10	/* 0x3fe */
+	sub	a4, a4, a5
+	bgei	a4, 64, .Lfixdfdi_maxint
+	blti	a4, 1, .Lfixdfdi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	xh, a7, xl
+	sll	xl, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixdfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixdfdi_shifted:	
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixdfdi_smallshift:
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixdfdi_shifted
+
+.Lfixdfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixdfdi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	xh, 0
+
+.Lfixdfdi_maxint:
+	slli	a7, a6, 11	/* 0x80000000 */
+	bgez	xh, 1f
+	mov	xh, a7
+	movi	xl, 0
+	leaf_return
+
+1:	addi	xh, a7, -1	/* 0x7fffffff */
+	movi	xl, -1
+	leaf_return
+
+.Lfixdfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_fixdfdi */
+
+#ifdef L_fixunsdfsi
+
+	.align	4
+	.global	__fixunsdfsi
+	.type	__fixunsdfsi, @function
+__fixunsdfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixunsdfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 20, 10	/* 0x3ff */
+	sub	a4, a4, a5
+	bgei	a4, 32, .Lfixunsdfsi_maxint
+	bltz	a4, .Lfixunsdfsi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	a5, a7, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 32, .Lfixunsdfsi_bigexp
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixunsdfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixunsdfsi_maxint
+
+	/* Translate NaN to 0xffffffff.  */
+	movi	a2, -1
+	leaf_return
+
+.Lfixunsdfsi_maxint:
+	slli	a4, a6, 11	/* 0x80000000 */
+	movi	a5, -1		/* 0xffffffff */
+	movgez	a4, a5, xh
+	mov	a2, a4
+	leaf_return
+
+.Lfixunsdfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lfixunsdfsi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	xh, 1f
+	mov	a2, a5		/* no shift needed */
+	leaf_return
+
+	/* Return 0x80000000 if negative.  */
+1:	slli	a2, a6, 11
+	leaf_return
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_fixunsdfdi
+
+	.align	4
+	.global	__fixunsdfdi
+	.type	__fixunsdfdi, @function
+__fixunsdfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixunsdfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 20, 10	/* 0x3ff */
+	sub	a4, a4, a5
+	bgei	a4, 64, .Lfixunsdfdi_maxint
+	bltz	a4, .Lfixunsdfdi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	xh, a7, xl
+	sll	xl, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 64, .Lfixunsdfdi_bigexp
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixunsdfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixunsdfdi_shifted:
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixunsdfdi_smallshift:
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixunsdfdi_shifted
+
+.Lfixunsdfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixunsdfdi_maxint
+
+	/* Translate NaN to 0xffffffff.... */
+1:	movi	xh, -1
+	movi	xl, -1
+	leaf_return
+
+.Lfixunsdfdi_maxint:
+	bgez	xh, 1b
+2:	slli	xh, a6, 11	/* 0x80000000 */
+	movi	xl, 0
+	leaf_return
+
+.Lfixunsdfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+.Lfixunsdfdi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a7, 2b
+	leaf_return		/* no shift needed */
+
+#endif /* L_fixunsdfdi */
+
+#ifdef L_floatsidf
+
+	.align	4
+	.global	__floatunsidf
+	.type	__floatunsidf, @function
+__floatunsidf:
+	leaf_entry sp, 16
+	beqz	a2, .Lfloatsidf_return_zero
+
+	/* Set the sign to zero and jump to the floatsidf code.  */
+	movi	a7, 0
+	j	.Lfloatsidf_normalize
+
+	.align	4
+	.global	__floatsidf
+	.type	__floatsidf, @function
+__floatsidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	beqz	a2, .Lfloatsidf_return_zero
+
+	/* Save the sign.  */
+	extui	a7, a2, 31, 1
+
+	/* Get the absolute value.  */
+#if XCHAL_HAVE_ABS
+	abs	a2, a2
+#else
+	neg	a4, a2
+	movltz	a2, a4, a2
+#endif
+
+.Lfloatsidf_normalize:
+	/* Normalize with the first 1 bit in the msb.  */
+	do_nsau	a4, a2, a5, a6
+	ssl	a4
+	sll	a5, a2
+
+	/* Shift the mantissa into position.  */
+	srli	xh, a5, 11
+	slli	xl, a5, (32 - 11)
+
+	/* Set the exponent.  */
+	movi	a5, 0x41d	/* 0x3fe + 31 */
+	sub	a5, a5, a4
+	slli	a5, a5, 20
+	add	xh, xh, a5
+
+	/* Add the sign and return. */
+	slli	a7, a7, 31
+	or	xh, xh, a7
+	leaf_return
+
+.Lfloatsidf_return_zero:
+	movi	a3, 0
+	leaf_return
+
+#endif /* L_floatsidf */
+
+#ifdef L_floatdidf
+
+	.align	4
+	.global	__floatundidf
+	.type	__floatundidf, @function
+__floatundidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Set the sign to zero and jump to the floatdidf code.  */
+	movi	a7, 0
+	j	.Lfloatdidf_normalize
+
+	.align	4
+	.global	__floatdidf
+	.type	__floatdidf, @function
+__floatdidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Save the sign.  */
+	extui	a7, xh, 31, 1
+
+	/* Get the absolute value.  */
+	bgez	xh, .Lfloatdidf_normalize
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, .Lfloatdidf_normalize
+	addi	xh, xh, -1
+
+.Lfloatdidf_normalize:
+	/* Normalize with the first 1 bit in the msb of xh.  */
+	beqz	xh, .Lfloatdidf_bigshift
+	do_nsau	a4, xh, a5, a6
+	ssl	a4
+	src	xh, xh, xl
+	sll	xl, xl
+
+.Lfloatdidf_shifted:
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	ssai	11
+	sll	a6, xl
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* Set the exponent.  */
+	movi	a5, 0x43d	/* 0x3fe + 63 */
+	sub	a5, a5, a4
+	slli	a5, a5, 20
+	add	xh, xh, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	xh, xh, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, 2f
+	addi	xl, xl, 1
+	beqz	xl, .Lfloatdidf_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatdidf_exactlyhalf
+2:	leaf_return
+
+.Lfloatdidf_bigshift:
+	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
+	do_nsau	a4, xl, a5, a6
+	ssl	a4
+	sll	xh, xl
+	movi	xl, 0
+	addi	a4, a4, 32
+	j	.Lfloatdidf_shifted
+
+.Lfloatdidf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Lfloatdidf_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+#endif /* L_floatdidf */
+
+#ifdef L_truncdfsf2
+
+	.align	4
+	.global	__truncdfsf2
+	.type	__truncdfsf2, @function
+__truncdfsf2:
+	leaf_entry sp, 16
+
+	/* Adjust the exponent bias.  */
+	movi	a4, (0x3ff - 0x7f) << 20
+	sub	a5, xh, a4
+
+	/* Check for underflow.  */
+	xor	a6, xh, a5
+	bltz	a6, .Ltrunc_underflow
+	extui	a6, a5, 20, 11
+	beqz	a6, .Ltrunc_underflow
+
+	/* Check for overflow.  */
+	movi	a4, 255
+	bge	a6, a4, .Ltrunc_overflow
+
+	/* Shift a5/xl << 3 into a5/a4.  */
+	ssai	(32 - 3)
+	src	a5, a5, xl
+	sll	a4, xl
+
+.Ltrunc_addsign:
+	/* Add the sign bit.  */
+	extui	a6, xh, 31, 1
+	slli	a6, a6, 31
+	or	a2, a6, a5
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a4, 1f
+	addi	a2, a2, 1
+	/* Overflow to the exponent is OK.  The answer will be correct.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a4, a4, 1
+	beqz	a4, .Ltrunc_exactlyhalf
+1:	leaf_return
+
+.Ltrunc_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+.Ltrunc_overflow:
+	/* Check if exponent == 0x7ff.  */
+	movi	a4, 0x7ff00000
+	bnall	xh, a4, 1f
+
+	/* Check if mantissa is nonzero.  */
+	slli	a5, xh, 12
+	or	a5, a5, xl
+	beqz	a5, 1f
+
+	/* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
+	srli	a4, a4, 1
+
+1:	slli	a4, a4, 4	/* 0xff000000 or 0xff800000 */
+	/* Add the sign bit.  */
+	extui	a6, xh, 31, 1
+	ssai	1
+	src	a2, a6, a4
+	leaf_return
+
+.Ltrunc_underflow:
+	/* Find shift count for a subnormal.  Flush to zero if >= 32.  */
+	extui	a6, xh, 20, 11
+	movi	a5, 0x3ff - 0x7f
+	sub	a6, a5, a6
+	addi	a6, a6, 1
+	bgeui	a6, 32, 1f
+
+	/* Replace the exponent with an explicit "1.0".  */
+	slli	a5, a5, 13	/* 0x700000 */
+	or	a5, a5, xh
+	slli	a5, a5, 11
+	srli	a5, a5, 11
+
+	/* Shift the mantissa left by 3 bits (into a5/a4).  */
+	ssai	(32 - 3)
+	src	a5, a5, xl
+	sll	a4, xl
+
+	/* Shift right by a6.  */
+	ssr	a6
+	sll	a7, a4
+	src	a4, a5, a4
+	srl	a5, a5
+	beqz	a7, .Ltrunc_addsign
+	or	a4, a4, a6	/* any positive, nonzero value will work */
+	j	.Ltrunc_addsign
+
+	/* Return +/- zero.  */
+1:	extui	a2, xh, 31, 1
+	slli	a2, a2, 31
+	leaf_return
+
+#endif /* L_truncdfsf2 */
+
+#ifdef L_extendsfdf2
+
+	.align	4
+	.global	__extendsfdf2
+	.type	__extendsfdf2, @function
+__extendsfdf2:
+	leaf_entry sp, 16
+
+	/* Save the sign bit and then shift it off.  */
+	extui	a5, a2, 31, 1
+	slli	a5, a5, 31
+	slli	a4, a2, 1
+
+	/* Extract and check the exponent.  */
+	extui	a6, a2, 23, 8
+	beqz	a6, .Lextend_expzero
+	addi	a6, a6, 1
+	beqi	a6, 256, .Lextend_nan_or_inf
+
+	/* Shift >> 3 into a4/xl.  */
+	srli	a4, a4, 4
+	slli	xl, a2, (32 - 3)
+
+	/* Adjust the exponent bias.  */
+	movi	a6, (0x3ff - 0x7f) << 20
+	add	a4, a4, a6
+
+	/* Add the sign bit.  */
+	or	xh, a4, a5
+	leaf_return
+
+.Lextend_nan_or_inf:
+	movi	a4, 0x7ff00000
+
+	/* Check for NaN.  */
+	slli	a7, a2, 9
+	beqz	a7, 1f
+
+	slli	a6, a6, 11	/* 0x80000 */
+	or	a4, a4, a6
+
+	/* Add the sign and return.  */
+1:	or	xh, a4, a5
+	movi	xl, 0
+	leaf_return
+
+.Lextend_expzero:
+	beqz	a4, 1b
+
+	/* Normalize it to have 8 zero bits before the first 1 bit.  */
+	do_nsau	a7, a4, a2, a3
+	addi	a7, a7, -8
+	ssl	a7
+	sll	a4, a4
+	
+	/* Shift >> 3 into a4/xl.  */
+	slli	xl, a4, (32 - 3)
+	srli	a4, a4, 3
+
+	/* Set the exponent.  */
+	movi	a6, 0x3fe - 0x7f
+	sub	a6, a6, a7
+	slli	a6, a6, 20
+	add	a4, a4, a6
+
+	/* Add the sign and return.  */
+	or	xh, a4, a5
+	leaf_return
+
+#endif /* L_extendsfdf2 */
+
+
diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S
new file mode 100644
index 000000000..d75be0e5a
--- /dev/null
+++ b/gcc/config/xtensa/ieee754-sf.S
@@ -0,0 +1,1757 @@
+/* IEEE-754 single-precision functions for Xtensa
+   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/*  Warning!  The branch displacements for some Xtensa branch instructions
+    are quite small, and this code has been carefully laid out to keep
+    branch targets in range.  If you change anything, be sure to check that
+    the assembler is not relaxing anything to branch over a jump.  */
+
+#ifdef L_negsf2
+
+	.align	4
+	.global	__negsf2
+	.type	__negsf2, @function
+__negsf2:
+	leaf_entry sp, 16
+	movi	a4, 0x80000000
+	xor	a2, a2, a4
+	leaf_return
+
+#endif /* L_negsf2 */
+
+#ifdef L_addsubsf3
+
+	/* Addition */
+__addsf3_aux:
+
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Ladd_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	a3, a6, 1f
+	/* If x is a NaN, return it.  Otherwise, return y.  */
+	slli	a7, a2, 9
+	beqz	a7, .Ladd_ynan_or_inf
+1:	leaf_return
+
+.Ladd_ynan_or_inf:
+	/* Return y.  */
+	mov	a2, a3
+	leaf_return
+
+.Ladd_opposite_signs:
+	/* Operand signs differ.  Do a subtraction.  */
+	slli	a7, a6, 8
+	xor	a3, a3, a7
+	j	.Lsub_same_sign
+
+	.align	4
+	.global	__addsf3
+	.type	__addsf3, @function
+__addsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, a2, a3
+	bltz	a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:	
+	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
+	ball	a2, a6, .Ladd_xnan_or_inf
+	ball	a3, a6, .Ladd_ynan_or_inf
+
+	/* Compare the exponents.  The smaller operand will be shifted
+	   right by the exponent difference and added to the larger
+	   one.  */
+	extui	a7, a2, 23, 9
+	extui	a8, a3, 23, 9
+	bltu	a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	a3, a6, .Ladd_yexpzero
+
+	/* Replace y sign/exponent with 0x008.  */
+	or	a3, a3, a6
+	slli	a3, a3, 8
+	srli	a3, a3, 8
+
+.Ladd_yexpdiff:
+	/* Compute the exponent difference.  */
+	sub	a10, a7, a8
+
+	/* Exponent difference > 32 -- just return the bigger value.  */
+	bgeui	a10, 32, 1f
+	
+	/* Shift y right by the exponent difference.  Any bits that are
+	   shifted out of y are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, a3, a9
+	srl	a3, a3
+
+	/* Do the addition.  */
+	add	a2, a2, a3
+
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, a2, 23, 9
+	beq	a10, a7, .Ladd_round
+	mov	a8, a7
+	j	.Ladd_carry
+
+.Ladd_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0", and increment the apparent exponent
+	   because subnormals behave as if they had the minimum (nonzero)
+	   exponent.  Test for the case when both exponents are zero.  */
+	slli	a3, a3, 9
+	srli	a3, a3, 9
+	bnone	a2, a6, .Ladd_bothexpzero
+	addi	a8, a8, 1
+	j	.Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+	/* Both exponents are zero.  Handle this as a special case.  There
+	   is no need to shift or round, and the normal code for handling
+	   a carry into the exponent field will not work because it
+	   assumes there is an implicit "1.0" that needs to be added.  */
+	add	a2, a2, a3
+1:	leaf_return
+
+.Ladd_xexpzero:
+	/* Same as "yexpzero" except skip handling the case when both
+	   exponents are zero.  */
+	slli	a2, a2, 9
+	srli	a2, a2, 9
+	addi	a7, a7, 1
+	j	.Ladd_xexpdiff
+
+.Ladd_shiftx:
+	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
+	   because the exponent difference is always nonzero in this version,
+	   the shift sequence can use SLL and skip loading a constant zero.  */
+	bnone	a2, a6, .Ladd_xexpzero
+
+	or	a2, a2, a6
+	slli	a2, a2, 8
+	srli	a2, a2, 8
+
+.Ladd_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Ladd_returny
+	
+	ssr	a10
+	sll	a9, a2
+	srl	a2, a2
+
+	add	a2, a2, a3
+
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, a2, 23, 9
+	bne	a10, a8, .Ladd_carry
+
+.Ladd_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	a2, a2, 1
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_returny:
+	mov	a2, a3
+	leaf_return
+
+.Ladd_carry:	
+	/* The addition has overflowed into the exponent field, so the
+	   value needs to be renormalized.  The mantissa of the result
+	   can be recovered by subtracting the original exponent and
+	   adding 0x800000 (which is the explicit "1.0" for the
+	   mantissa of the non-shifted operand -- the "1.0" for the
+	   shifted operand was already added).  The mantissa can then
+	   be shifted right by one bit.  The explicit "1.0" of the
+	   shifted mantissa then needs to be replaced by the exponent,
+	   incremented by one to account for the normalizing shift.
+	   It is faster to combine these operations: do the shift first
+	   and combine the additions and subtractions.  If x is the
+	   original exponent, the result is:
+	       shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
+	   or:
+	       shifted mantissa + ((x + 1) << 22)
+	   Note that the exponent is incremented here by leaving the
+	   explicit "1.0" of the mantissa in the exponent field.  */
+
+	/* Shift x right by one bit.  Save the lsb.  */
+	mov	a10, a2
+	srli	a2, a2, 1
+
+	/* See explanation above.  The original exponent is in a8.  */
+	addi	a8, a8, 1
+	slli	a8, a8, 22
+	add	a2, a2, a8
+
+	/* Return an Infinity if the exponent overflowed.  */
+	ball	a2, a6, .Ladd_infinity
+	
+	/* Same thing as the "round" code except the msb of the leftover
+	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
+	bbci.l	a10, 0, 1f
+	addi	a2, a2, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_infinity:
+	/* Clear the mantissa.  */
+	srli	a2, a2, 23
+	slli	a2, a2, 23
+
+	/* The sign bit may have been lost in a carry-out.  Put it back.  */
+	slli	a8, a8, 1
+	or	a2, a2, a8
+	leaf_return
+
+.Ladd_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+
+	/* Subtraction */
+__subsf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Lsub_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	a3, a6, 1f
+	/* Both x and y are either NaN or Inf, so the result is NaN.  */
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Lsub_ynan_or_inf:
+	/* Negate y and return it.  */
+	slli	a7, a6, 8
+	xor	a2, a3, a7
+	leaf_return
+
+.Lsub_opposite_signs:
+	/* Operand signs differ.  Do an addition.  */
+	slli	a7, a6, 8
+	xor	a3, a3, a7
+	j	.Ladd_same_sign
+
+	.align	4
+	.global	__subsf3
+	.type	__subsf3, @function
+__subsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, a2, a3
+	bltz	a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:	
+	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
+	ball	a2, a6, .Lsub_xnan_or_inf
+	ball	a3, a6, .Lsub_ynan_or_inf
+
+	/* Compare the operands.  In contrast to addition, the entire
+	   value matters here.  */
+	extui	a7, a2, 23, 8
+	extui	a8, a3, 23, 8
+	bltu	a2, a3, .Lsub_xsmaller
+
+.Lsub_ysmaller:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	a3, a6, .Lsub_yexpzero
+
+	/* Replace y sign/exponent with 0x008.  */
+	or	a3, a3, a6
+	slli	a3, a3, 8
+	srli	a3, a3, 8
+
+.Lsub_yexpdiff:
+	/* Compute the exponent difference.  */
+	sub	a10, a7, a8
+
+	/* Exponent difference > 32 -- just return the bigger value.  */
+	bgeui	a10, 32, 1f
+	
+	/* Shift y right by the exponent difference.  Any bits that are
+	   shifted out of y are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, a3, a9
+	srl	a3, a3
+
+	sub	a2, a2, a3
+
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from a2.  */
+	neg	a9, a9
+	addi	a10, a2, -1
+	movnez	a2, a10, a9
+
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, a2, 23, 8
+	beq	a10, a7, .Lsub_round
+	j	.Lsub_borrow
+
+.Lsub_yexpzero:
+	/* Return zero if the inputs are equal.  (For the non-subnormal
+	   case, subtracting the "1.0" will cause a borrow from the exponent
+	   and this case can be detected when handling the borrow.)  */
+	beq	a2, a3, .Lsub_return_zero
+
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
+	   y's apparent exponent because subnormals behave as if they had
+	   the minimum (nonzero) exponent.  */
+	slli	a3, a3, 9
+	srli	a3, a3, 9
+	bnone	a2, a6, .Lsub_yexpdiff
+	addi	a8, a8, 1
+	j	.Lsub_yexpdiff
+
+.Lsub_returny:
+	/* Negate and return y.  */
+	slli	a7, a6, 8
+	xor	a2, a3, a7
+1:	leaf_return
+
+.Lsub_xsmaller:
+	/* Same thing as the "ysmaller" code, but with x and y swapped and
+	   with y negated.  */
+	bnone	a2, a6, .Lsub_xexpzero
+
+	or	a2, a2, a6
+	slli	a2, a2, 8
+	srli	a2, a2, 8
+
+.Lsub_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Lsub_returny
+	
+	ssr	a10
+	movi	a9, 0
+	src	a9, a2, a9
+	srl	a2, a2
+
+	/* Negate y.  */
+	slli	a11, a6, 8
+	xor	a3, a3, a11
+
+	sub	a2, a3, a2
+
+	neg	a9, a9
+	addi	a10, a2, -1
+	movnez	a2, a10, a9
+
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, a2, 23, 8
+	bne	a10, a8, .Lsub_borrow
+
+.Lsub_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	a2, a2, 1
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Lsub_exactlyhalf
+1:	leaf_return
+
+.Lsub_xexpzero:
+	/* Same as "yexpzero".  */
+	beq	a2, a3, .Lsub_return_zero
+	slli	a2, a2, 9
+	srli	a2, a2, 9
+	bnone	a3, a6, .Lsub_xexpdiff
+	addi	a7, a7, 1
+	j	.Lsub_xexpdiff
+
+.Lsub_return_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lsub_borrow:	
+	/* The subtraction has underflowed into the exponent field, so the
+	   value needs to be renormalized.  Shift the mantissa left as
+	   needed to remove any leading zeros and adjust the exponent
+	   accordingly.  If the exponent is not large enough to remove
+	   all the leading zeros, the result will be a subnormal value.  */
+
+	slli	a8, a2, 9
+	beqz	a8, .Lsub_xzero
+	do_nsau	a6, a8, a7, a11
+	srli	a8, a8, 9
+	bge	a6, a10, .Lsub_subnormal
+	addi	a6, a6, 1
+
+.Lsub_normalize_shift:
+	/* Shift the mantissa (a8/a9) left by a6.  */
+	ssl	a6
+	src	a8, a8, a9
+	sll	a9, a9
+
+	/* Combine the shifted mantissa with the sign and exponent,
+	   decrementing the exponent by a6.  (The exponent has already
+	   been decremented by one due to the borrow from the subtraction,
+	   but adding the mantissa will increment the exponent by one.)  */
+	srli	a2, a2, 23
+	sub	a2, a2, a6
+	slli	a2, a2, 23
+	add	a2, a2, a8
+	j	.Lsub_round
+
+.Lsub_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+.Lsub_xzero:
+	/* If there was a borrow from the exponent, and the mantissa and
+	   guard digits are all zero, then the inputs were equal and the
+	   result should be zero.  */
+	beqz	a9, .Lsub_return_zero
+
+	/* Only the guard digit is nonzero.  Shift by min(24, a10).  */
+	addi	a11, a10, -24
+	movi	a6, 24
+	movltz	a6, a10, a11
+	j	.Lsub_normalize_shift
+
+.Lsub_subnormal:
+	/* The exponent is too small to shift away all the leading zeros.
+	   Set a6 to the current exponent (which has already been
+	   decremented by the borrow) so that the exponent of the result
+	   will be zero.  Do not add 1 to a6 in this case, because: (1)
+	   adding the mantissa will not increment the exponent, so there is
+	   no need to subtract anything extra from the exponent to
+	   compensate, and (2) the effective exponent of a subnormal is 1
+	   not 0 so the shift amount must be 1 smaller than normal. */
+	mov	a6, a10
+	j	.Lsub_normalize_shift
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_mulsf3
+
+	/* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__mulsf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Lmul_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	a2, a2, 1
+	srli	a2, a2, 1
+
+	/* If x is zero, return zero.  */
+	beqz	a2, .Lmul_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	do_nsau	a10, a2, a11, a12
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a2, a2 
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Lmul_xnormalized	
+	
+.Lmul_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	a3, a3, 1
+	srli	a3, a3, 1
+
+	/* If y is zero, return zero.  */
+	beqz	a3, .Lmul_return_zero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	do_nsau	a10, a3, a11, a12
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a3, a3
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Lmul_ynormalized	
+
+.Lmul_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	j	.Lmul_done
+
+.Lmul_xnan_or_inf:
+	/* If y is zero, return NaN.  */
+	slli	a8, a3, 1
+	bnez	a8, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+	j	.Lmul_done
+1:
+	/* If y is NaN, return y.  */
+	bnall	a3, a6, .Lmul_returnx
+	slli	a8, a3, 9
+	beqz	a8, .Lmul_returnx
+
+.Lmul_returny:
+	mov	a2, a3
+
+.Lmul_returnx:
+	/* Set the sign bit and return.  */
+	extui	a7, a7, 31, 1
+	slli	a2, a2, 1
+	ssai	1
+	src	a2, a7, a2
+	j	.Lmul_done
+
+.Lmul_ynan_or_inf:
+	/* If x is zero, return NaN.  */
+	slli	a8, a2, 1
+	bnez	a8, .Lmul_returny
+	movi	a7, 0x400000	/* make it a quiet NaN */
+	or	a2, a3, a7
+	j	.Lmul_done
+
+	.align	4
+	.global	__mulsf3
+	.type	__mulsf3, @function
+__mulsf3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 64
+#else
+	leaf_entry sp, 32
+#endif
+	movi	a6, 0x7f800000
+
+	/* Get the sign of the result.  */
+	xor	a7, a2, a3
+
+	/* Check for NaN and infinity.  */
+	ball	a2, a6, .Lmul_xnan_or_inf
+	ball	a3, a6, .Lmul_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, a2, 23, 8
+	extui	a9, a3, 23, 8
+
+	beqz	a8, .Lmul_xexpzero
+.Lmul_xnormalized:	
+	beqz	a9, .Lmul_yexpzero
+.Lmul_ynormalized:	
+
+	/* Add the exponents.  */
+	add	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0xffffff
+	or	a2, a2, a6
+	and	a2, a2, a10
+	or	a3, a3, a6
+	and	a3, a3, a10
+
+	/* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
+
+#if XCHAL_HAVE_MUL32_HIGH
+
+	mull	a6, a2, a3
+	muluh	a2, a2, a3
+
+#else
+
+	/* Break the inputs into 16-bit chunks and compute 4 32-bit partial
+	   products.  These partial products are:
+
+		0 xl * yl
+
+		1 xl * yh
+		2 xh * yl
+
+		3 xh * yh
+
+	   If using the Mul16 or Mul32 multiplier options, these input
+	   chunks must be stored in separate registers.  For Mac16, the
+	   UMUL.AA.* opcodes can specify that the inputs come from either
+	   half of the registers, so there is no need to shift them out
+	   ahead of time.  If there is no multiply hardware, the 16-bit
+	   chunks can be extracted when setting up the arguments to the
+	   separate multiply function.  */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Calling a separate multiply function will clobber a0 and requires
+	   use of a8 as a temporary, so save those values now.  (The function
+	   uses a custom ABI so nothing else needs to be saved.)  */
+	s32i	a0, sp, 0
+	s32i	a8, sp, 4
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	a2h, a2, 16
+	srli	a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	a2, a2, 0, 16
+	extui	a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+	
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
+	do_mul(a6, a2, l, a3, h)	/* pp 1 */
+	do_mul(a11, a2, h, a3, l)	/* pp 2 */
+	movi	a9, 0
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Shift the high half of a9/a6 into position in a9.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a9, a9, a6
+
+	/* Compute the low word into a6.  */
+	do_mul(a11, a2, l, a3, l)	/* pp 0 */
+	sll	a6, a6
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute the high word into a2.  */
+	do_mul(a2, a2, h, a3, h)	/* pp 3 */
+	add	a2, a2, a9
+	
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Restore values saved on the stack during the multiplication.  */
+	l32i	a0, sp, 0
+	l32i	a8, sp, 4
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Shift left by 9 bits, unless there was a carry-out from the
+	   multiply, in which case, shift by 8 bits and increment the
+	   exponent.  */
+	movi	a4, 9
+	srli	a5, a2, 24 - 9
+	beqz	a5, 1f
+	addi	a4, a4, -1
+	addi	a8, a8, 1
+1:	ssl	a4
+	src	a2, a2, a6
+	sll	a6, a6
+
+	/* Subtract the extra bias from the exponent sum (plus one to account
+	   for the explicit "1.0" of the mantissa that will be added to the
+	   exponent in the final result).  */
+	movi	a4, 0x80
+	sub	a8, a8, a4
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..fd are OK here.  */
+	movi	a4, 0xfe
+	bgeu	a8, a4, .Lmul_overflow
+	
+.Lmul_round:
+	/* Round.  */
+	bgez	a6, .Lmul_rounded
+	addi	a2, a2, 1
+	slli	a6, a6, 1
+	beqz	a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 23
+	add	a2, a2, a8
+
+.Lmul_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+.Lmul_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	j	.Lmul_rounded
+
+.Lmul_overflow:
+	bltz	a8, .Lmul_underflow
+	/* Return +/- Infinity.  */
+	movi	a8, 0xff
+	slli	a2, a8, 23
+	j	.Lmul_addsign
+
+.Lmul_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	mov	a9, a6
+	ssr	a8
+	bgeui	a8, 32, .Lmul_flush_to_zero
+	
+	/* Shift a2 right.  Any bits that are shifted out of a2 are saved
+	   in a6 (combined with the shifted-out bits currently in a6) for
+	   rounding the result.  */
+	sll	a6, a2
+	srl	a2, a2
+
+	/* Set the exponent to zero.  */
+	movi	a8, 0
+
+	/* Pack any nonzero bits shifted out into a6.  */
+	beqz	a9, .Lmul_round
+	movi	a9, 1
+	or	a6, a6, a9
+	j	.Lmul_round
+	
+.Lmul_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	j	.Lmul_done
+
+#if XCHAL_NO_MUL
+	
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_mulsf3 */
+
+#ifdef L_divsf3
+
+	/* Division */
+__divsf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Ldiv_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	a3, a3, 1
+	srli	a3, a3, 1
+
+	/* Check for division by zero.  */
+	beqz	a3, .Ldiv_yzero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	do_nsau	a10, a3, a4, a5
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a3, a3
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Ldiv_ynormalized	
+
+.Ldiv_yzero:
+	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
+	slli	a4, a2, 1
+	srli	a4, a4, 1
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	or	a2, a2, a6
+	bnez	a4, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Ldiv_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	a2, a2, 1
+	srli	a2, a2, 1
+
+	/* If x is zero, return zero.  */
+	beqz	a2, .Ldiv_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	do_nsau	a10, a2, a4, a5
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a2, a2
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Ldiv_xnormalized	
+	
+.Ldiv_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	leaf_return
+
+.Ldiv_xnan_or_inf:
+	/* Set the sign bit of the result.  */
+	srli	a7, a3, 31
+	slli	a7, a7, 31
+	xor	a2, a2, a7
+	/* If y is NaN or Inf, return NaN.  */
+	bnall	a3, a6, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Ldiv_ynan_or_inf:
+	/* If y is Infinity, return zero.  */
+	slli	a8, a3, 9
+	beqz	a8, .Ldiv_return_zero
+	/* y is NaN; return it.  */
+	mov	a2, a3
+	leaf_return
+
+	.align	4
+	.global	__divsf3
+	.type	__divsf3, @function
+__divsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Get the sign of the result.  */
+	xor	a7, a2, a3
+
+	/* Check for NaN and infinity.  */
+	ball	a2, a6, .Ldiv_xnan_or_inf
+	ball	a3, a6, .Ldiv_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, a2, 23, 8
+	extui	a9, a3, 23, 8
+
+	beqz	a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:	
+	beqz	a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:	
+
+	/* Subtract the exponents.  */
+	sub	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0xffffff
+	or	a2, a2, a6
+	and	a2, a2, a10
+	or	a3, a3, a6
+	and	a3, a3, a10
+
+	/* The first digit of the mantissa division must be a one.
+	   Shift x (and adjust the exponent) as needed to make this true.  */
+	bltu	a3, a2, 1f
+	slli	a2, a2, 1
+	addi	a8, a8, -1
+1:
+	/* Do the first subtraction and shift.  */
+	sub	a2, a2, a3
+	slli	a2, a2, 1
+
+	/* Put the quotient into a10.  */
+	movi	a10, 1
+
+	/* Divide one bit at a time for 23 bits.  */
+	movi	a9, 23
+#if XCHAL_HAVE_LOOPS
+	loop	a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+	/* Shift the quotient << 1.  */
+	slli	a10, a10, 1
+
+	/* Is this digit a 0 or 1?  */
+	bltu	a2, a3, 1f
+
+	/* Output a 1 and subtract.  */
+	addi	a10, a10, 1
+	sub	a2, a2, a3
+
+	/* Shift the dividend << 1.  */
+1:	slli	a2, a2, 1
+
+#if !XCHAL_HAVE_LOOPS
+	addi	a9, a9, -1
+	bnez	a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+	/* Add the exponent bias (less one to account for the explicit "1.0"
+	   of the mantissa that will be added to the exponent in the final
+	   result).  */
+	addi	a8, a8, 0x7e
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..fd are OK here.  */
+	movi	a4, 0xfe
+	bgeu	a8, a4, .Ldiv_overflow
+	
+.Ldiv_round:
+	/* Round.  The remainder (<< 1) is in a2.  */
+	bltu	a2, a3, .Ldiv_rounded
+	addi	a10, a10, 1
+	beq	a2, a3, .Ldiv_exactlyhalf
+
+.Ldiv_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 23
+	add	a2, a10, a8
+
+.Ldiv_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	a2, a2, a7
+	leaf_return
+
+.Ldiv_overflow:
+	bltz	a8, .Ldiv_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a4, 1	/* 0xff */
+	slli	a2, a8, 23
+	j	.Ldiv_addsign
+
+.Ldiv_exactlyhalf:
+	/* Remainder is exactly half the divisor.  Round even.  */
+	srli	a10, a10, 1
+	slli	a10, a10, 1
+	j	.Ldiv_rounded
+
+.Ldiv_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	ssr	a8
+	bgeui	a8, 32, .Ldiv_flush_to_zero
+	
+	/* Shift a10 right.  Any bits that are shifted out of a10 are
+	   saved in a6 for rounding the result.  */
+	sll	a6, a10
+	srl	a10, a10
+
+	/* Set the exponent to zero.  */
+	movi	a8, 0
+
+	/* Pack any nonzero remainder (in a2) into a6.  */
+	beqz	a2, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+	
+	/* Round a10 based on the bits shifted out into a6.  */
+1:	bgez	a6, .Ldiv_rounded
+	addi	a10, a10, 1
+	slli	a6, a6, 1
+	bnez	a6, .Ldiv_rounded
+	srli	a10, a10, 1
+	slli	a10, a10, 1
+	j	.Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	leaf_return
+
+#endif /* L_divsf3 */
+
+#ifdef L_cmpsf2
+
+	/* Equal and Not Equal */
+
+	.align	4
+	.global	__eqsf2
+	.global	__nesf2
+	.set	__nesf2, __eqsf2
+	.type	__eqsf2, @function
+__eqsf2:
+	leaf_entry sp, 16
+	bne	a2, a3, 4f
+
+	/* The values are equal but NaN != NaN.  Check the exponent.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, 3f
+
+	/* Equal.  */
+	movi	a2, 0
+	leaf_return
+
+	/* Not equal.  */
+2:	movi	a2, 1
+	leaf_return
+
+	/* Check if the mantissas are nonzero.  */
+3:	slli	a7, a2, 9
+	j	5f
+
+	/* Check if x and y are zero with different signs.  */
+4:	or	a7, a2, a3
+	slli	a7, a7, 1
+
+	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+	   or x when exponent(x) = 0x7f8 and x == y.  */
+5:	movi	a2, 0
+	movi	a3, 1
+	movnez	a2, a3, a7	
+	leaf_return
+
+
+	/* Greater Than */
+
+	.align	4
+	.global	__gtsf2
+	.type	__gtsf2, @function
+__gtsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Lle_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+
+	/* Less Than or Equal */
+
+	.align	4
+	.global	__lesf2
+	.type	__lesf2, @function
+__lesf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Lle_cmp
+	movi	a2, 1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+.Lle_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, a2, a3
+	bltz	a7, .Lle_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	a2, .Lle_xneg
+
+	/* Check if x <= y.  */
+	bltu	a3, a2, 5f
+4:	movi	a2, 0
+	leaf_return
+
+.Lle_xneg:
+	/* Check if y <= x.  */
+	bgeu	a2, a3, 4b
+5:	movi	a2, 1
+	leaf_return
+
+.Lle_diff_signs:
+	bltz	a2, 4b
+
+	/* Check if both x and y are zero.  */
+	or	a7, a2, a3
+	slli	a7, a7, 1
+	movi	a2, 1
+	movi	a3, 0
+	moveqz	a2, a3, a7
+	leaf_return
+
+
+	/* Greater Than or Equal */
+
+	.align	4
+	.global	__gesf2
+	.type	__gesf2, @function
+__gesf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Llt_cmp
+	movi	a2, -1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, -1
+	leaf_return
+
+
+	/* Less Than */
+
+	.align	4
+	.global	__ltsf2
+	.type	__ltsf2, @function
+__ltsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Llt_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+.Llt_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, a2, a3
+	bltz	a7, .Llt_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	a2, .Llt_xneg
+
+	/* Check if x < y.  */
+	bgeu	a2, a3, 5f
+4:	movi	a2, -1
+	leaf_return
+
+.Llt_xneg:
+	/* Check if y < x.  */
+	bltu	a3, a2, 4b
+5:	movi	a2, 0
+	leaf_return
+
+.Llt_diff_signs:
+	bgez	a2, 5b
+
+	/* Check if both x and y are nonzero.  */
+	or	a7, a2, a3
+	slli	a7, a7, 1
+	movi	a2, 0
+	movi	a3, -1
+	movnez	a2, a3, a7
+	leaf_return
+
+
+	/* Unordered */
+
+	.align	4
+	.global	__unordsf2
+	.type	__unordsf2, @function
+__unordsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 3f
+1:	ball	a3, a6, 4f
+2:	movi	a2, 0
+	leaf_return
+
+3:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+4:	slli	a7, a3, 9
+	beqz	a7, 2b
+	movi	a2, 1
+	leaf_return
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_fixsfsi
+
+	.align	4
+	.global	__fixsfsi
+	.type	__fixsfsi, @function
+__fixsfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixsfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7e
+	bgei	a4, 32, .Lfixsfsi_maxint
+	blti	a4, 1, .Lfixsfsi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	a5, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixsfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixsfsi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	a2, 0
+
+.Lfixsfsi_maxint:
+	slli	a4, a6, 8	/* 0x80000000 */
+	addi	a5, a4, -1	/* 0x7fffffff */
+	movgez	a4, a5, a2
+	mov	a2, a4
+	leaf_return
+
+.Lfixsfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+#endif /* L_fixsfsi */
+
+#ifdef L_fixsfdi
+
+	.align	4
+	.global	__fixsfdi
+	.type	__fixsfdi, @function
+__fixsfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixsfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7e
+	bgei	a4, 64, .Lfixsfdi_maxint
+	blti	a4, 1, .Lfixsfdi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	xh, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixsfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixsfdi_shifted:	
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixsfdi_smallshift:
+	movi	xl, 0
+	sll	xl, xh
+	srl	xh, xh
+	j	.Lfixsfdi_shifted
+
+.Lfixsfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixsfdi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	a2, 0
+
+.Lfixsfdi_maxint:
+	slli	a7, a6, 8	/* 0x80000000 */
+	bgez	a2, 1f
+	mov	xh, a7
+	movi	xl, 0
+	leaf_return
+
+1:	addi	xh, a7, -1	/* 0x7fffffff */
+	movi	xl, -1
+	leaf_return
+
+.Lfixsfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_fixsfdi */
+
+#ifdef L_fixunssfsi
+
+	.align	4
+	.global	__fixunssfsi
+	.type	__fixunssfsi, @function
+__fixunssfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixunssfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7f
+	bgei	a4, 32, .Lfixunssfsi_maxint
+	bltz	a4, .Lfixunssfsi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	a5, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 32, .Lfixunssfsi_bigexp
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixunssfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixunssfsi_maxint
+
+	/* Translate NaN to 0xffffffff.  */
+	movi	a2, -1
+	leaf_return
+
+.Lfixunssfsi_maxint:
+	slli	a4, a6, 8	/* 0x80000000 */
+	movi	a5, -1		/* 0xffffffff */
+	movgez	a4, a5, a2
+	mov	a2, a4
+	leaf_return
+
+.Lfixunssfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lfixunssfsi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a2, 1f
+	mov	a2, a5		/* no shift needed */
+	leaf_return
+
+	/* Return 0x80000000 if negative.  */
+1:	slli	a2, a6, 8
+	leaf_return
+
+#endif /* L_fixunssfsi */
+
+#ifdef L_fixunssfdi
+
+	.align	4
+	.global	__fixunssfdi
+	.type	__fixunssfdi, @function
+__fixunssfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixunssfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7f
+	bgei	a4, 64, .Lfixunssfdi_maxint
+	bltz	a4, .Lfixunssfdi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	xh, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 64, .Lfixunssfdi_bigexp
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixunssfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixunssfdi_shifted:
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixunssfdi_smallshift:
+	movi	xl, 0
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixunssfdi_shifted
+
+.Lfixunssfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixunssfdi_maxint
+
+	/* Translate NaN to 0xffffffff.... */
+1:	movi	xh, -1
+	movi	xl, -1
+	leaf_return
+
+.Lfixunssfdi_maxint:
+	bgez	a2, 1b
+2:	slli	xh, a6, 8	/* 0x80000000 */
+	movi	xl, 0
+	leaf_return
+
+.Lfixunssfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+.Lfixunssfdi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a7, 2b
+	movi	xl, 0
+	leaf_return		/* no shift needed */
+
+#endif /* L_fixunssfdi */
+
+#ifdef L_floatsisf
+
+	.align	4
+	.global	__floatunsisf
+	.type	__floatunsisf, @function
+__floatunsisf:
+	leaf_entry sp, 16
+	beqz	a2, .Lfloatsisf_return
+
+	/* Set the sign to zero and jump to the floatsisf code.  */
+	movi	a7, 0
+	j	.Lfloatsisf_normalize
+
+	.align	4
+	.global	__floatsisf
+	.type	__floatsisf, @function
+__floatsisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	beqz	a2, .Lfloatsisf_return
+
+	/* Save the sign.  */
+	extui	a7, a2, 31, 1
+
+	/* Get the absolute value.  */
+#if XCHAL_HAVE_ABS
+	abs	a2, a2
+#else
+	neg	a4, a2
+	movltz	a2, a4, a2
+#endif
+
+.Lfloatsisf_normalize:
+	/* Normalize with the first 1 bit in the msb.  */
+	do_nsau	a4, a2, a5, a6
+	ssl	a4
+	sll	a5, a2
+
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	srli	a2, a5, 8
+	slli	a6, a5, (32 - 8)
+
+	/* Set the exponent.  */
+	movi	a5, 0x9d	/* 0x7e + 31 */
+	sub	a5, a5, a4
+	slli	a5, a5, 23
+	add	a2, a2, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, .Lfloatsisf_return
+	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatsisf_exactlyhalf
+
+.Lfloatsisf_return:
+	leaf_return
+
+.Lfloatsisf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+#endif /* L_floatsisf */
+
+#ifdef L_floatdisf
+
+	.align	4
+	.global	__floatundisf
+	.type	__floatundisf, @function
+__floatundisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Set the sign to zero and jump to the floatdisf code.  */
+	movi	a7, 0
+	j	.Lfloatdisf_normalize
+
+	.align	4
+	.global	__floatdisf
+	.type	__floatdisf, @function
+__floatdisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Save the sign.  */
+	extui	a7, xh, 31, 1
+
+	/* Get the absolute value.  */
+	bgez	xh, .Lfloatdisf_normalize
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, .Lfloatdisf_normalize
+	addi	xh, xh, -1
+
+.Lfloatdisf_normalize:
+	/* Normalize with the first 1 bit in the msb of xh.  */
+	beqz	xh, .Lfloatdisf_bigshift
+	do_nsau	a4, xh, a5, a6
+	ssl	a4
+	src	xh, xh, xl
+	sll	xl, xl
+
+.Lfloatdisf_shifted:
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	ssai	8
+	sll	a5, xl
+	src	a6, xh, xl
+	srl	xh, xh
+	beqz	a5, 1f
+	movi	a5, 1
+	or	a6, a6, a5
+1:
+	/* Set the exponent.  */
+	movi	a5, 0xbd	/* 0x7e + 63 */
+	sub	a5, a5, a4
+	slli	a5, a5, 23
+	add	a2, xh, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, 2f
+	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatdisf_exactlyhalf
+2:	leaf_return
+
+.Lfloatdisf_bigshift:
+	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
+	do_nsau	a4, xl, a5, a6
+	ssl	a4
+	sll	xh, xl
+	movi	xl, 0
+	addi	a4, a4, 32
+	j	.Lfloatdisf_shifted
+
+.Lfloatdisf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+#endif /* L_floatdisf */
diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm
new file mode 100644
index 000000000..071b91711
--- /dev/null
+++ b/gcc/config/xtensa/lib1funcs.asm
@@ -0,0 +1,845 @@
+/* Assembly functions for the Xtensa version of libgcc1.
+   Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
+   Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "xtensa-config.h"
+
+/* Define macros for the ABS and ADDX* instructions to handle cases
+   where they are not included in the Xtensa processor configuration.  */
+
+	.macro	do_abs dst, src, tmp
+#if XCHAL_HAVE_ABS
+	abs	\dst, \src
+#else
+	neg	\tmp, \src
+	movgez	\tmp, \src, \src
+	mov	\dst, \tmp
+#endif
+	.endm
+
+	.macro	do_addx2 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx2	\dst, \as, \at
+#else
+	slli	\tmp, \as, 1
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+	.macro	do_addx4 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx4	\dst, \as, \at
+#else
+	slli	\tmp, \as, 2
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+	.macro	do_addx8 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx8	\dst, \as, \at
+#else
+	slli	\tmp, \as, 3
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+/* Define macros for leaf function entry and return, supporting either the
+   standard register windowed ABI or the non-windowed call0 ABI.  These
+   macros do not allocate any extra stack space, so they only work for
+   leaf functions that do not need to spill anything to the stack.  */
+
+	.macro leaf_entry reg, size
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	entry \reg, \size
+#else
+	/* do nothing */
+#endif
+	.endm
+
+	.macro leaf_return
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	retw
+#else
+	ret
+#endif
+	.endm
+
+
+#ifdef L_mulsi3
+	.align	4
+	.global	__mulsi3
+	.type	__mulsi3, @function
+__mulsi3:
+	leaf_entry sp, 16
+
+#if XCHAL_HAVE_MUL32
+	mull	a2, a2, a3
+
+#elif XCHAL_HAVE_MUL16
+	or	a4, a2, a3
+	srai	a4, a4, 16
+	bnez	a4, .LMUL16
+	mul16u	a2, a2, a3
+	leaf_return
+.LMUL16:
+	srai	a4, a2, 16
+	srai	a5, a3, 16
+	mul16u	a7, a4, a3
+	mul16u	a6, a5, a2
+	mul16u	a4, a2, a3
+	add	a7, a7, a6
+	slli	a7, a7, 16
+	add	a2, a7, a4
+
+#elif XCHAL_HAVE_MAC16
+	mul.aa.hl a2, a3
+	mula.aa.lh a2, a3
+	rsr	a5, ACCLO
+	umul.aa.ll a2, a3
+	rsr	a4, ACCLO
+	slli	a5, a5, 16
+	add	a2, a4, a5
+
+#else /* !MUL32 && !MUL16 && !MAC16 */
+
+	/* Multiply one bit at a time, but unroll the loop 4x to better
+	   exploit the addx instructions and avoid overhead.
+	   Peel the first iteration to save a cycle on init.  */
+
+	/* Avoid negative numbers.  */
+	xor	a5, a2, a3	/* Top bit is 1 if one input is negative.  */
+	do_abs	a3, a3, a6
+	do_abs	a2, a2, a6
+
+	/* Swap so the second argument is smaller.  */
+	sub	a7, a2, a3
+	mov	a4, a3
+	movgez	a4, a2, a7	/* a4 = max (a2, a3) */
+	movltz	a3, a2, a7	/* a3 = min (a2, a3) */
+
+	movi	a2, 0
+	extui	a6, a3, 0, 1
+	movnez	a2, a4, a6
+
+	do_addx2 a7, a4, a2, a7
+	extui	a6, a3, 1, 1
+	movnez	a2, a7, a6
+
+	do_addx4 a7, a4, a2, a7
+	extui	a6, a3, 2, 1
+	movnez	a2, a7, a6
+
+	do_addx8 a7, a4, a2, a7
+	extui	a6, a3, 3, 1
+	movnez	a2, a7, a6
+
+	bgeui	a3, 16, .Lmult_main_loop
+	neg	a3, a2
+	movltz	a2, a3, a5
+	leaf_return
+
+	.align	4
+.Lmult_main_loop:
+	srli	a3, a3, 4
+	slli	a4, a4, 4
+
+	add	a7, a4, a2
+	extui	a6, a3, 0, 1
+	movnez	a2, a7, a6
+
+	do_addx2 a7, a4, a2, a7
+	extui	a6, a3, 1, 1
+	movnez	a2, a7, a6
+
+	do_addx4 a7, a4, a2, a7
+	extui	a6, a3, 2, 1
+	movnez	a2, a7, a6
+
+	do_addx8 a7, a4, a2, a7
+	extui	a6, a3, 3, 1
+	movnez	a2, a7, a6
+
+	bgeui	a3, 16, .Lmult_main_loop
+
+	neg	a3, a2
+	movltz	a2, a3, a5
+
+#endif /* !MUL32 && !MUL16 && !MAC16 */
+
+	leaf_return
+	.size	__mulsi3, . - __mulsi3
+
+#endif /* L_mulsi3 */
+
+
+#ifdef L_umulsidi3
+
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+	.align	4
+	.global	__umulsidi3
+	.type	__umulsidi3, @function
+__umulsidi3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 48
+#else
+	leaf_entry sp, 16
+#endif
+
+#ifdef __XTENSA_EB__
+#define wh a2
+#define wl a3
+#else
+#define wh a3
+#define wl a2
+#endif /* __XTENSA_EB__ */
+
+	/* This code is taken from the mulsf3 routine in ieee754-sf.S.
+	   See more comments there.  */
+
+#if XCHAL_HAVE_MUL32_HIGH
+	mull	a6, a2, a3
+	muluh	wh, a2, a3
+	mov	wl, a6
+
+#else /* ! MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* a0 and a8 will be clobbered by calling the multiply function
+	   but a8 is not used here and need not be saved.  */
+	s32i	a0, sp, 0
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	a2h, a2, 16
+	srli	a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	a2, a2, 0, 16
+	extui	a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
+	do_mul(a6, a2, l, a3, h)	/* pp 1 */
+	do_mul(a11, a2, h, a3, l)	/* pp 2 */
+	movi	a9, 0
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Shift the high half of a9/a6 into position in a9.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a9, a9, a6
+
+	/* Compute the low word into a6.  */
+	do_mul(a11, a2, l, a3, l)	/* pp 0 */
+	sll	a6, a6
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute the high word into wh.  */
+	do_mul(wh, a2, h, a3, h)	/* pp 3 */
+	add	wh, wh, a9
+	mov	wl, a6
+
+#endif /* !MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Restore the original return address.  */
+	l32i	a0, sp, 0
+#endif
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+#if XCHAL_NO_MUL
+
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+
+	.size	__umulsidi3, . - __umulsidi3
+
+#endif /* L_umulsidi3 */
+
+
+/* Define a macro for the NSAU (unsigned normalize shift amount)
+   instruction, which computes the number of leading zero bits,
+   to handle cases where it is not included in the Xtensa processor
+   configuration.  */
+
+	.macro	do_nsau cnt, val, tmp, a
+#if XCHAL_HAVE_NSA
+	nsau	\cnt, \val
+#else
+	mov	\a, \val
+	movi	\cnt, 0
+	extui	\tmp, \a, 16, 16
+	bnez	\tmp, 0f
+	movi	\cnt, 16
+	slli	\a, \a, 16
+0:
+	extui	\tmp, \a, 24, 8
+	bnez	\tmp, 1f
+	addi	\cnt, \cnt, 8
+	slli	\a, \a, 8
+1:
+	movi	\tmp, __nsau_data
+	extui	\a, \a, 24, 8
+	add	\tmp, \tmp, \a
+	l8ui	\tmp, \tmp, 0
+	add	\cnt, \cnt, \tmp
+#endif /* !XCHAL_HAVE_NSA */
+	.endm
+
+#ifdef L_clz
+	.section .rodata
+	.align	4
+	.global	__nsau_data
+	.type	__nsau_data, @object
+__nsau_data:
+#if !XCHAL_HAVE_NSA
+	.byte	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
+	.byte	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
+	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif /* !XCHAL_HAVE_NSA */
+	.size	__nsau_data, . - __nsau_data
+	.hidden	__nsau_data
+#endif /* L_clz */
+
+
+#ifdef L_clzsi2
+	.align	4
+	.global	__clzsi2
+	.type	__clzsi2, @function
+__clzsi2:
+	leaf_entry sp, 16
+	do_nsau	a2, a2, a3, a4
+	leaf_return
+	.size	__clzsi2, . - __clzsi2
+
+#endif /* L_clzsi2 */
+
+
+#ifdef L_ctzsi2
+	.align	4
+	.global	__ctzsi2
+	.type	__ctzsi2, @function
+__ctzsi2:
+	leaf_entry sp, 16
+	neg	a3, a2
+	and	a3, a3, a2
+	do_nsau	a2, a3, a4, a5
+	neg	a2, a2
+	addi	a2, a2, 31
+	leaf_return
+	.size	__ctzsi2, . - __ctzsi2
+
+#endif /* L_ctzsi2 */
+
+
+#ifdef L_ffssi2
+	.align	4
+	.global	__ffssi2
+	.type	__ffssi2, @function
+__ffssi2:
+	leaf_entry sp, 16
+	neg	a3, a2
+	and	a3, a3, a2
+	do_nsau	a2, a3, a4, a5
+	neg	a2, a2
+	addi	a2, a2, 32
+	leaf_return
+	.size	__ffssi2, . - __ffssi2
+
+#endif /* L_ffssi2 */
+
+
+#ifdef L_udivsi3
+	.align	4
+	.global	__udivsi3
+	.type	__udivsi3, @function
+__udivsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	quou	a2, a2, a3
+#else
+	bltui	a3, 2, .Lle_one	/* check if the divisor <= 1 */
+
+	mov	a6, a2		/* keep dividend in a6 */
+	do_nsau	a5, a6, a2, a7	/* dividend_shift = nsau (dividend) */
+	do_nsau	a4, a3, a2, a7	/* divisor_shift = nsau (divisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = divisor_shift - dividend_shift */
+	ssl	a4
+	sll	a3, a3		/* divisor <<= count */
+	movi	a2, 0		/* quotient = 0 */
+
+	/* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a6, a3, .Lzerobit
+	sub	a6, a6, a3
+	addi	a2, a2, 1
+.Lzerobit:
+	slli	a2, a2, 1
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+	bltu	a6, a3, .Lreturn
+	addi	a2, a2, 1	/* increment quotient if dividend >= divisor */
+.Lreturn:
+	leaf_return
+
+.Lle_one:
+	beqz	a3, .Lerror	/* if divisor == 1, return the dividend */
+	leaf_return
+
+.Lspecial:
+	/* return dividend >= divisor */
+	bltu	a6, a3, .Lreturn0
+	movi	a2, 1
+	leaf_return
+
+.Lerror:
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__udivsi3, . - __udivsi3
+
+#endif /* L_udivsi3 */
+
+
+#ifdef L_divsi3
+	.align	4
+	.global	__divsi3
+	.type	__divsi3, @function
+__divsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	quos	a2, a2, a3
+#else
+	xor	a7, a2, a3	/* sign = dividend ^ divisor */
+	do_abs	a6, a2, a4	/* udividend = abs (dividend) */
+	do_abs	a3, a3, a4	/* udivisor = abs (divisor) */
+	bltui	a3, 2, .Lle_one	/* check if udivisor <= 1 */
+	do_nsau	a5, a6, a2, a8	/* udividend_shift = nsau (udividend) */
+	do_nsau	a4, a3, a2, a8	/* udivisor_shift = nsau (udivisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = udivisor_shift - udividend_shift */
+	ssl	a4
+	sll	a3, a3		/* udivisor <<= count */
+	movi	a2, 0		/* quotient = 0 */
+
+	/* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a6, a3, .Lzerobit
+	sub	a6, a6, a3
+	addi	a2, a2, 1
+.Lzerobit:
+	slli	a2, a2, 1
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+	bltu	a6, a3, .Lreturn
+	addi	a2, a2, 1	/* increment if udividend >= udivisor */
+.Lreturn:
+	neg	a5, a2
+	movltz	a2, a5, a7	/* return (sign < 0) ? -quotient : quotient */
+	leaf_return
+
+.Lle_one:
+	beqz	a3, .Lerror
+	neg	a2, a6		/* if udivisor == 1, then return... */
+	movgez	a2, a6, a7	/* (sign < 0) ? -udividend : udividend */
+	leaf_return
+
+.Lspecial:
+	bltu	a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
+	movi	a2, 1
+	movi	a4, -1
+	movltz	a2, a4, a7	/* else return (sign < 0) ? -1 : 1 */
+	leaf_return
+
+.Lerror:
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__divsi3, . - __divsi3
+
+#endif /* L_divsi3 */
+
+
+#ifdef L_umodsi3
+	.align	4
+	.global	__umodsi3
+	.type	__umodsi3, @function
+__umodsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	remu	a2, a2, a3
+#else
+	bltui	a3, 2, .Lle_one	/* check if the divisor is <= 1 */
+
+	do_nsau	a5, a2, a6, a7	/* dividend_shift = nsau (dividend) */
+	do_nsau	a4, a3, a6, a7	/* divisor_shift = nsau (divisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = divisor_shift - dividend_shift */
+	ssl	a4
+	sll	a3, a3		/* divisor <<= count */
+
+	/* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a2, a3, .Lzerobit
+	sub	a2, a2, a3
+.Lzerobit:
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+	bltu	a2, a3, .Lreturn
+	sub	a2, a2, a3	/* subtract once more if dividend >= divisor */
+.Lreturn:
+	leaf_return
+
+.Lle_one:
+	bnez	a3, .Lreturn0
+
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__umodsi3, . - __umodsi3
+
+#endif /* L_umodsi3 */
+
+
+#ifdef L_modsi3
+	.align	4
+	.global	__modsi3
+	.type	__modsi3, @function
+__modsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	rems	a2, a2, a3
+#else
+	mov	a7, a2		/* save original (signed) dividend */
+	do_abs	a2, a2, a4	/* udividend = abs (dividend) */
+	do_abs	a3, a3, a4	/* udivisor = abs (divisor) */
+	bltui	a3, 2, .Lle_one	/* check if udivisor <= 1 */
+	do_nsau	a5, a2, a6, a8	/* udividend_shift = nsau (udividend) */
+	do_nsau	a4, a3, a6, a8	/* udivisor_shift = nsau (udivisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = udivisor_shift - udividend_shift */
+	ssl	a4
+	sll	a3, a3		/* udivisor <<= count */
+
+	/* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a2, a3, .Lzerobit
+	sub	a2, a2, a3
+.Lzerobit:
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+	bltu	a2, a3, .Lreturn
+	sub	a2, a2, a3	/* subtract again if udividend >= udivisor */
+.Lreturn:
+	bgez	a7, .Lpositive
+	neg	a2, a2		/* if (dividend < 0), return -udividend */
+.Lpositive:
+	leaf_return
+
+.Lle_one:
+	bnez	a3, .Lreturn0
+
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__modsi3, . - __modsi3
+
+#endif /* L_modsi3 */
+
+
+#ifdef __XTENSA_EB__
+#define uh a2
+#define ul a3
+#else
+#define uh a3
+#define ul a2
+#endif /* __XTENSA_EB__ */
+
+
+#ifdef L_ashldi3
+	.align	4
+	.global	__ashldi3
+	.type	__ashldi3, @function
+__ashldi3:
+	leaf_entry sp, 16
+	ssl	a4
+	bgei	a4, 32, .Llow_only
+	src	uh, uh, ul
+	sll	ul, ul
+	leaf_return
+
+.Llow_only:
+	sll	uh, ul
+	movi	ul, 0
+	leaf_return
+	.size	__ashldi3, . - __ashldi3
+
+#endif /* L_ashldi3 */
+
+
+#ifdef L_ashrdi3
+	.align	4
+	.global	__ashrdi3
+	.type	__ashrdi3, @function
+__ashrdi3:
+	leaf_entry sp, 16
+	ssr	a4
+	bgei	a4, 32, .Lhigh_only
+	src	ul, uh, ul
+	sra	uh, uh
+	leaf_return
+
+.Lhigh_only:
+	sra	ul, uh
+	srai	uh, uh, 31
+	leaf_return
+	.size	__ashrdi3, . - __ashrdi3
+
+#endif /* L_ashrdi3 */
+
+
+#ifdef L_lshrdi3
+	.align	4
+	.global	__lshrdi3
+	.type	__lshrdi3, @function
+__lshrdi3:
+	leaf_entry sp, 16
+	ssr	a4
+	bgei	a4, 32, .Lhigh_only1
+	src	ul, uh, ul
+	srl	uh, uh
+	leaf_return
+
+.Lhigh_only1:
+	srl	ul, uh
+	movi	uh, 0
+	leaf_return
+	.size	__lshrdi3, . - __lshrdi3
+
+#endif /* L_lshrdi3 */
+
+
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
diff --git a/gcc/config/xtensa/lib2funcs.S b/gcc/config/xtensa/lib2funcs.S
new file mode 100644
index 000000000..65134e24c
--- /dev/null
+++ b/gcc/config/xtensa/lib2funcs.S
@@ -0,0 +1,186 @@
+/* Assembly functions for libgcc2.
+   Copyright (C) 2001, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "xtensa-config.h"
+
+/* __xtensa_libgcc_window_spill: This function flushes out all but the
+   current register window.  This is used to set up the stack so that
+   arbitrary frames can be accessed.  */
+
+	.align	4
+	.global	__xtensa_libgcc_window_spill
+	.type	__xtensa_libgcc_window_spill,@function
+__xtensa_libgcc_window_spill:
+	entry	sp, 32
+	movi	a2, 0
+	syscall
+	retw
+	.size	__xtensa_libgcc_window_spill, .-__xtensa_libgcc_window_spill
+
+
+/* __xtensa_nonlocal_goto: This code does all the hard work of a
+   nonlocal goto on Xtensa.  It is here in the library to avoid the
+   code size bloat of generating it in-line.  There are two
+   arguments:
+
+	a2 = frame pointer for the procedure containing the label
+	a3 = goto handler address
+
+  This function never returns to its caller but instead goes directly
+  to the address of the specified goto handler.  */
+
+	.align	4
+	.global	__xtensa_nonlocal_goto
+	.type	__xtensa_nonlocal_goto,@function
+__xtensa_nonlocal_goto:
+	entry	sp, 32
+
+	/* Flush registers.  */
+	mov	a5, a2
+	movi	a2, 0
+	syscall
+	mov	a2, a5
+
+	/* Because the save area for a0-a3 is stored one frame below
+	   the one identified by a2, the only way to restore those
+	   registers is to unwind the stack.  If alloca() were never
+	   called, we could just unwind until finding the sp value
+	   matching a2.  However, a2 is a frame pointer, not a stack
+	   pointer, and may not be encountered during the unwinding.
+	   The solution is to unwind until going _past_ the value
+	   given by a2.  This involves keeping three stack pointer
+	   values during the unwinding:
+
+		next = sp of frame N-1
+		cur = sp of frame N
+		prev = sp of frame N+1
+
+	   When next > a2, the desired save area is stored relative
+	   to prev.  At this point, cur will be the same as a2
+	   except in the alloca() case.
+
+	   Besides finding the values to be restored to a0-a3, we also
+	   need to find the current window size for the target
+	   function.  This can be extracted from the high bits of the
+	   return address, initially in a0.  As the unwinding
+	   proceeds, the window size is taken from the value of a0
+	   saved _two_ frames below the current frame.  */
+
+	addi	a5, sp, -16	/* a5 = prev - save area */
+	l32i	a6, a5, 4
+	addi	a6, a6, -16	/* a6 = cur - save area */
+	mov	a8, a0		/* a8 = return address (for window size) */
+	j	.Lfirstframe
+
+.Lnextframe:
+	l32i	a8, a5, 0	/* next return address (for window size) */
+	mov	a5, a6		/* advance prev */
+	addi	a6, a7, -16	/* advance cur */
+.Lfirstframe:
+	l32i	a7, a6, 4	/* a7 = next */
+	bgeu	a2, a7, .Lnextframe
+
+	/* At this point, prev (a5) points to the save area with the saved
+	   values of a0-a3.  Copy those values into the save area at the
+	   current sp so they will be reloaded when the return from this
+	   function underflows.  We don't have to worry about exceptions
+	   while updating the current save area, because the windows have
+	   already been flushed.  */
+
+	addi	a4, sp, -16	/* a4 = save area of this function */
+	l32i	a6, a5, 0
+	l32i	a7, a5, 4
+	s32i	a6, a4, 0
+	s32i	a7, a4, 4
+	l32i	a6, a5, 8
+	l32i	a7, a5, 12
+	s32i	a6, a4, 8
+	s32i	a7, a4, 12
+
+	/* Set return address to goto handler.  Use the window size bits
+	   from the return address two frames below the target.  */
+	extui	a8, a8, 30, 2	/* get window size from return addr. */
+	slli	a3, a3, 2	/* get goto handler addr. << 2 */
+	ssai	2
+	src	a0, a8, a3	/* combine them with a funnel shift */
+
+	retw
+	.size	__xtensa_nonlocal_goto, .-__xtensa_nonlocal_goto
+
+
+/* __xtensa_sync_caches: This function is called after writing a trampoline
+   on the stack to force all the data writes to memory and invalidate the
+   instruction cache. a2 is the address of the new trampoline.
+
+   After the trampoline data is written out, it must be flushed out of
+   the data cache into memory.  We use DHWB in case we have a writeback
+   cache.  At least one DHWB instruction is needed for each data cache
+   line which may be touched by the trampoline.  An ISYNC instruction
+   must follow the DHWBs.
+
+   We have to flush the i-cache to make sure that the new values get used.
+   At least one IHI instruction is needed for each i-cache line which may
+   be touched by the trampoline.  An ISYNC instruction is also needed to
+   make sure that the modified instructions are loaded into the instruction
+   fetch buffer.  */
+
+/* Use the maximum trampoline size.  Flushing a bit extra is OK.  */
+#define TRAMPOLINE_SIZE 60
+
+	.text
+	.align	4
+	.global	__xtensa_sync_caches
+	.type	__xtensa_sync_caches,@function
+__xtensa_sync_caches:
+	entry 	sp, 32
+#if XCHAL_DCACHE_SIZE > 0
+	/* Flush the trampoline from the data cache.  */
+	extui	a4, a2, 0, XCHAL_DCACHE_LINEWIDTH
+	addi	a4, a4, TRAMPOLINE_SIZE
+	addi	a4, a4, (1 << XCHAL_DCACHE_LINEWIDTH) - 1
+	srli	a4, a4, XCHAL_DCACHE_LINEWIDTH
+	mov	a3, a2
+.Ldcache_loop:
+	dhwb	a3, 0
+	addi	a3, a3, (1 << XCHAL_DCACHE_LINEWIDTH)
+	addi	a4, a4, -1
+	bnez	a4, .Ldcache_loop
+	isync
+#endif
+#if XCHAL_ICACHE_SIZE > 0
+	/* Invalidate the corresponding lines in the instruction cache.  */
+	extui	a4, a2, 0, XCHAL_ICACHE_LINEWIDTH
+	addi	a4, a4, TRAMPOLINE_SIZE
+	addi	a4, a4, (1 << XCHAL_ICACHE_LINEWIDTH) - 1
+	srli	a4, a4, XCHAL_ICACHE_LINEWIDTH
+.Licache_loop:
+	ihi	a2, 0
+	addi	a2, a2, (1 << XCHAL_ICACHE_LINEWIDTH)
+	addi	a4, a4, -1
+	bnez	a4, .Licache_loop
+#endif
+	isync
+	retw
+	.size	__xtensa_sync_caches, .-__xtensa_sync_caches
diff --git a/gcc/config/xtensa/libgcc-xtensa.ver b/gcc/config/xtensa/libgcc-xtensa.ver
new file mode 100644
index 000000000..43e7d4fc7
--- /dev/null
+++ b/gcc/config/xtensa/libgcc-xtensa.ver
@@ -0,0 +1,3 @@
+GCC_4.3.0 {
+  __umulsidi3
+}
diff --git a/gcc/config/xtensa/linux-unwind.h b/gcc/config/xtensa/linux-unwind.h
new file mode 100644
index 000000000..245649728
--- /dev/null
+++ b/gcc/config/xtensa/linux-unwind.h
@@ -0,0 +1,97 @@
+/* DWARF2 EH unwinding support for Xtensa.
+   Copyright (C) 2008, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2-xtensa.c for the structs.
+   Don't use this at all if inhibit_libc is used.  */
+
+#ifndef inhibit_libc
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+/* Encoded bytes for Xtensa instructions:
+	movi a2, __NR_rt_sigreturn
+	syscall
+	entry (first byte only)
+   Some of the bytes are endian-dependent.  */
+
+#define MOVI_BYTE0 0x22
+#define MOVI_BYTE2 225 /* __NR_rt_sigreturn */
+#define SYSC_BYTE0 0
+#define SYSC_BYTE2 0
+
+#ifdef __XTENSA_EB__
+#define MOVI_BYTE1 0x0a
+#define SYSC_BYTE1 0x05
+#define ENTRY_BYTE 0x6c
+#else
+#define MOVI_BYTE1 0xa0
+#define SYSC_BYTE1 0x50
+#define ENTRY_BYTE 0x36
+#endif
+
+#define MD_FALLBACK_FRAME_STATE_FOR xtensa_fallback_frame_state
+
+static _Unwind_Reason_Code
+xtensa_fallback_frame_state (struct _Unwind_Context *context,
+			     _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  struct sigcontext *sc;
+
+  struct rt_sigframe {
+    siginfo_t info;
+    struct ucontext uc;
+  } *rt_;
+
+  /* movi a2, __NR_rt_sigreturn; syscall */
+  if (pc[0] != MOVI_BYTE0
+      || pc[1] != MOVI_BYTE1
+      || pc[2] != MOVI_BYTE2
+      || pc[3] != SYSC_BYTE0
+      || pc[4] != SYSC_BYTE1
+      || pc[5] != SYSC_BYTE2)
+    return _URC_END_OF_STACK;
+
+  rt_ = context->sp;
+  sc = &rt_->uc.uc_mcontext;
+  fs->signal_regs = (_Unwind_Word *) sc->sc_a;
+
+  /* If the signal arrived just before an ENTRY instruction, find the return
+     address and pretend the signal arrived before executing the CALL.  */
+  if (*(unsigned char *) sc->sc_pc == ENTRY_BYTE)
+   {
+     unsigned callinc = (sc->sc_ps >> 16) & 3;
+     fs->signal_ra = ((sc->sc_a[callinc << 2] & XTENSA_RA_FIELD_MASK)
+		      | context->ra_high_bits) - 3;
+   }
+  else
+    fs->signal_ra = sc->sc_pc;
+
+  fs->signal_frame = 1;
+  return _URC_NO_REASON;
+}
+
+#endif /* ifdef inhibit_libc  */
diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h
new file mode 100644
index 000000000..83d2a9767
--- /dev/null
+++ b/gcc/config/xtensa/linux.h
@@ -0,0 +1,71 @@
+/* Xtensa Linux configuration.
+   Derived from the configuration for GCC for Intel i386 running Linux.
+   Copyright (C) 2001, 2002, 2003, 2006, 2007, 2008, 2010, 2011
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fputs (" (Xtensa GNU/Linux with ELF)", stderr);
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+ "%{mtext-section-literals:--text-section-literals} \
+  %{mno-text-section-literals:--no-text-section-literals} \
+  %{mtarget-align:--target-align} \
+  %{mno-target-align:--no-target-align} \
+  %{mlongcalls:--longcalls} \
+  %{mno-longcalls:--no-longcalls}"
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+    %{static:-static}}"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"."
+
+/* Always enable "-fpic" for Xtensa Linux.  */
+#define XTENSA_ALWAYS_PIC 1
+
+#undef DBX_REGISTER_NUMBER
+
+#define MD_UNWIND_SUPPORT "config/xtensa/linux-unwind.h"
+
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
new file mode 100644
index 000000000..27f058de7
--- /dev/null
+++ b/gcc/config/xtensa/predicates.md
@@ -0,0 +1,175 @@
+;; Predicate definitions for Xtensa.
+;; Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "add_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_simm8 (INTVAL (op))
+			 || xtensa_simm8x256 (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "addsubx_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2
+		    || INTVAL (op) == 4
+		    || INTVAL (op) == 8")))
+
+(define_predicate "arith_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_simm8 (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+;; Non-immediate operand excluding the constant pool.
+(define_predicate "nonimmed_operand"
+  (ior (and (match_operand 0 "memory_operand")
+	    (match_test "!constantpool_mem_p (op)"))
+       (match_operand 0 "register_operand")))
+
+;; Memory operand excluding the constant pool.
+(define_predicate "mem_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_test "!constantpool_mem_p (op)")))
+
+;; Memory operand in the constant pool.
+(define_predicate "constantpool_operand"
+  (match_test "constantpool_mem_p (op)"))
+
+(define_predicate "mask_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_mask_immediate (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "extui_fldsz_operand"
+  (and (match_code "const_int")
+       (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)")))
+
+(define_predicate "sext_operand"
+  (if_then_else (match_test "TARGET_SEXT")
+		(match_operand 0 "nonimmed_operand")
+		(match_operand 0 "mem_operand")))
+
+(define_predicate "sext_fldsz_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23")))
+
+(define_predicate "lsbitnum_operand"
+  (and (match_code "const_int")
+       (match_test "BITS_BIG_ENDIAN
+		    ? (INTVAL (op) == BITS_PER_WORD - 1)
+		    : (INTVAL (op) == 0)")))
+
+(define_predicate "branch_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_b4const_or_zero (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "ubranch_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_b4constu (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "call_insn_operand"
+  (match_code "const_int,const,symbol_ref,reg")
+{
+  if ((GET_CODE (op) == REG)
+      && (op != arg_pointer_rtx)
+      && ((REGNO (op) < FRAME_POINTER_REGNUM)
+	  || (REGNO (op) > LAST_VIRTUAL_REGISTER)))
+    return true;
+
+  if (CONSTANT_ADDRESS_P (op))
+    {
+      /* Direct calls only allowed to static functions with PIC.  */
+      if (flag_pic)
+	{
+	  tree callee, callee_sec, caller_sec;
+
+	  if (GET_CODE (op) != SYMBOL_REF
+	      || !SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_EXTERNAL_P (op))
+	    return false;
+
+	  /* Don't attempt a direct call if the callee is known to be in
+	     a different section, since there's a good chance it will be
+	     out of range.  */
+
+	  if (flag_function_sections
+	      || DECL_ONE_ONLY (current_function_decl))
+	    return false;
+	  caller_sec = DECL_SECTION_NAME (current_function_decl);
+	  callee = SYMBOL_REF_DECL (op);
+	  if (callee)
+	    {
+	      if (DECL_ONE_ONLY (callee))
+		return false;
+	      callee_sec = DECL_SECTION_NAME (callee);
+	      if (((caller_sec == NULL_TREE) ^ (callee_sec == NULL_TREE))
+		  || (caller_sec != NULL_TREE
+		      && strcmp (TREE_STRING_POINTER (caller_sec),
+				 TREE_STRING_POINTER (callee_sec)) != 0))
+		return false;
+	    }
+	  else if (caller_sec != NULL_TREE)
+	    return false;
+	}
+      return true;
+    }
+
+  return false;
+})
+
+(define_predicate "move_operand"
+  (ior
+     (ior (match_operand 0 "register_operand")
+	  (and (match_operand 0 "memory_operand")
+	       (match_test "!constantpool_mem_p (op)
+			    || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))
+     (ior (and (match_code "const_int")
+	       (match_test "GET_MODE_CLASS (mode) == MODE_INT
+			    && xtensa_simm12b (INTVAL (op))"))
+	  (and (match_code "const_int,const_double,const,symbol_ref,label_ref")
+	       (match_test "TARGET_CONST16 && CONSTANT_P (op)
+			    && GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))))
+
+;; Accept the floating point constant 1 in the appropriate mode.
+(define_predicate "const_float_1_operand"
+  (match_code "const_double")
+{
+  REAL_VALUE_TYPE d;
+  REAL_VALUE_FROM_CONST_DOUBLE (d, op);
+  return REAL_VALUES_EQUAL (d, dconst1);
+})
+
+(define_predicate "fpmem_offset_operand"
+  (and (match_code "const_int")
+       (match_test "xtensa_mem_offset (INTVAL (op), SFmode)")))
+
+(define_predicate "branch_operator"
+  (match_code "eq,ne,lt,ge"))
+
+(define_predicate "ubranch_operator"
+  (match_code "ltu,geu"))
+
+(define_predicate "boolean_operator"
+  (match_code "eq,ne"))
+
+(define_predicate "xtensa_cstoresi_operator"
+  (match_code "eq,ne,gt,ge,lt,le"))
+
+(define_predicate "tls_symbol_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) != 0")))
diff --git a/gcc/config/xtensa/t-elf b/gcc/config/xtensa/t-elf
new file mode 100644
index 000000000..7d6cd1a3a
--- /dev/null
+++ b/gcc/config/xtensa/t-elf
@@ -0,0 +1,6 @@
+# Build CRT files and libgcc with the "longcalls" option
+CRTSTUFF_T_CFLAGS += -mlongcalls
+CRTSTUFF_T_CFLAGS_S += -mlongcalls
+TARGET_LIBGCC2_CFLAGS += -mlongcalls
+
+EXTRA_MULTILIB_PARTS = crti.o crtn.o crtbegin.o crtend.o
diff --git a/gcc/config/xtensa/t-linux b/gcc/config/xtensa/t-linux
new file mode 100644
index 000000000..7d535e155
--- /dev/null
+++ b/gcc/config/xtensa/t-linux
@@ -0,0 +1,3 @@
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
+
+SHLIB_MAPFILES += $(srcdir)/config/xtensa/libgcc-xtensa.ver
diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa
new file mode 100644
index 000000000..c3d98ae30
--- /dev/null
+++ b/gcc/config/xtensa/t-xtensa
@@ -0,0 +1,42 @@
+# Copyright (C) 2002, 2003, 2006, 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = xtensa/lib1funcs.asm
+LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
+	_umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
+	_ashldi3 _ashrdi3 _lshrdi3 \
+	_negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
+	_fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
+	_floatdisf _floatundisf \
+	_negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
+	_fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
+	_floatdidf _floatundidf \
+	_truncdfsf2 _extendsfdf2
+
+LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S
+LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \
+   $(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
+
+$(T)crti.o: $(srcdir)/config/xtensa/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/xtensa/crti.asm
+$(T)crtn.o: $(srcdir)/config/xtensa/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/xtensa/crtn.asm
+
+$(out_object_file): gt-xtensa.h
diff --git a/gcc/config/xtensa/unwind-dw2-xtensa.c b/gcc/config/xtensa/unwind-dw2-xtensa.c
new file mode 100644
index 000000000..9544f65ab
--- /dev/null
+++ b/gcc/config/xtensa/unwind-dw2-xtensa.c
@@ -0,0 +1,546 @@
+/* DWARF2 exception handling and frame unwinding for Xtensa.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+   2007, 2008, 2009
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "dwarf2.h"
+#include "unwind.h"
+#ifdef __USING_SJLJ_EXCEPTIONS__
+# define NO_SIZE_OF_ENCODED_VALUE
+#endif
+#include "unwind-pe.h"
+#include "unwind-dw2-fde.h"
+#include "unwind-dw2-xtensa.h"
+
+#ifndef __USING_SJLJ_EXCEPTIONS__
+
+/* The standard CIE and FDE structures work fine for Xtensa but the
+   variable-size register window save areas are not a good fit for the rest
+   of the standard DWARF unwinding mechanism.  Nor is that mechanism
+   necessary, since the register save areas are always in fixed locations
+   in each stack frame.  This file is a stripped down and customized version
+   of the standard DWARF unwinding code.  It needs to be customized to have
+   builtin logic for finding the save areas and also to track the stack
+   pointer value (besides the CFA) while unwinding since the primary save
+   area is located below the stack pointer.  It is stripped down to reduce
+   code size and ease the maintenance burden of tracking changes in the
+   standard version of the code.  */
+
+#ifndef DWARF_REG_TO_UNWIND_COLUMN
+#define DWARF_REG_TO_UNWIND_COLUMN(REGNO) (REGNO)
+#endif
+
+#define XTENSA_RA_FIELD_MASK 0x3FFFFFFF
+
+/* This is the register and unwind state for a particular frame.  This
+   provides the information necessary to unwind up past a frame and return
+   to its caller.  */
+struct _Unwind_Context
+{
+  /* Track register window save areas of 4 registers each, instead of
+     keeping separate addresses for the individual registers.  */
+  _Unwind_Word *reg[4];
+
+  void *cfa;
+  void *sp;
+  void *ra;
+
+  /* Cache the 2 high bits to replace the window size in return addresses.  */
+  _Unwind_Word ra_high_bits;
+
+  void *lsda;
+  struct dwarf_eh_bases bases;
+  /* Signal frame context.  */
+#define SIGNAL_FRAME_BIT ((~(_Unwind_Word) 0 >> 1) + 1)
+  _Unwind_Word flags;
+  /* 0 for now, can be increased when further fields are added to
+     struct _Unwind_Context.  */
+  _Unwind_Word version;
+};
+
+
+/* Read unaligned data from the instruction buffer.  */
+
+union unaligned
+{
+  void *p;
+} __attribute__ ((packed));
+
+static void uw_update_context (struct _Unwind_Context *, _Unwind_FrameState *);
+static _Unwind_Reason_Code uw_frame_state_for (struct _Unwind_Context *,
+					       _Unwind_FrameState *);
+
+static inline void *
+read_pointer (const void *p) { const union unaligned *up = p; return up->p; }
+
+static inline _Unwind_Word
+_Unwind_IsSignalFrame (struct _Unwind_Context *context)
+{
+  return (context->flags & SIGNAL_FRAME_BIT) ? 1 : 0;
+}
+
+static inline void
+_Unwind_SetSignalFrame (struct _Unwind_Context *context, int val)
+{
+  if (val)
+    context->flags |= SIGNAL_FRAME_BIT;
+  else
+    context->flags &= ~SIGNAL_FRAME_BIT;
+}
+
+/* Get the value of register INDEX as saved in CONTEXT.  */
+
+inline _Unwind_Word
+_Unwind_GetGR (struct _Unwind_Context *context, int index)
+{
+  _Unwind_Word *ptr;
+
+  index = DWARF_REG_TO_UNWIND_COLUMN (index);
+  ptr = context->reg[index >> 2] + (index & 3);
+
+  return *ptr;
+}
+
+/* Get the value of the CFA as saved in CONTEXT.  */
+
+_Unwind_Word
+_Unwind_GetCFA (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->cfa;
+}
+
+/* Overwrite the saved value for register INDEX in CONTEXT with VAL.  */
+
+inline void
+_Unwind_SetGR (struct _Unwind_Context *context, int index, _Unwind_Word val)
+{
+  _Unwind_Word *ptr;
+
+  index = DWARF_REG_TO_UNWIND_COLUMN (index);
+  ptr = context->reg[index >> 2] + (index & 3);
+
+  *ptr = val;
+}
+
+/* Retrieve the return address for CONTEXT.  */
+
+inline _Unwind_Ptr
+_Unwind_GetIP (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->ra;
+}
+
+/* Retrieve the return address and flag whether that IP is before
+   or after first not yet fully executed instruction.  */
+
+inline _Unwind_Ptr
+_Unwind_GetIPInfo (struct _Unwind_Context *context, int *ip_before_insn)
+{
+  *ip_before_insn = _Unwind_IsSignalFrame (context);
+  return (_Unwind_Ptr) context->ra;
+}
+
+/* Overwrite the return address for CONTEXT with VAL.  */
+
+inline void
+_Unwind_SetIP (struct _Unwind_Context *context, _Unwind_Ptr val)
+{
+  context->ra = (void *) val;
+}
+
+void *
+_Unwind_GetLanguageSpecificData (struct _Unwind_Context *context)
+{
+  return context->lsda;
+}
+
+_Unwind_Ptr
+_Unwind_GetRegionStart (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bases.func;
+}
+
+void *
+_Unwind_FindEnclosingFunction (void *pc)
+{
+  struct dwarf_eh_bases bases;
+  const struct dwarf_fde *fde = _Unwind_Find_FDE (pc-1, &bases);
+  if (fde)
+    return bases.func;
+  else
+    return NULL;
+}
+
+_Unwind_Ptr
+_Unwind_GetDataRelBase (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bases.dbase;
+}
+
+_Unwind_Ptr
+_Unwind_GetTextRelBase (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bases.tbase;
+}
+
+#ifdef MD_UNWIND_SUPPORT
+#include MD_UNWIND_SUPPORT
+#endif
+
+/* Extract any interesting information from the CIE for the translation
+   unit F belongs to.  Return a pointer to the byte after the augmentation,
+   or NULL if we encountered an undecipherable augmentation.  */
+
+static const unsigned char *
+extract_cie_info (const struct dwarf_cie *cie, struct _Unwind_Context *context,
+		  _Unwind_FrameState *fs)
+{
+  const unsigned char *aug = cie->augmentation;
+  const unsigned char *p = aug + strlen ((const char *)aug) + 1;
+  const unsigned char *ret = NULL;
+  _uleb128_t utmp;
+  _sleb128_t stmp;
+
+  /* g++ v2 "eh" has pointer immediately following augmentation string,
+     so it must be handled first.  */
+  if (aug[0] == 'e' && aug[1] == 'h')
+    {
+      fs->eh_ptr = read_pointer (p);
+      p += sizeof (void *);
+      aug += 2;
+    }
+
+  /* Immediately following the augmentation are the code and
+     data alignment and return address column.  */
+  p = read_uleb128 (p, &utmp);
+  p = read_sleb128 (p, &stmp);
+  if (cie->version == 1)
+    fs->retaddr_column = *p++;
+  else
+    {
+      p = read_uleb128 (p, &utmp);
+      fs->retaddr_column = (_Unwind_Word)utmp;
+    }
+  fs->lsda_encoding = DW_EH_PE_omit;
+
+  /* If the augmentation starts with 'z', then a uleb128 immediately
+     follows containing the length of the augmentation field following
+     the size.  */
+  if (*aug == 'z')
+    {
+      p = read_uleb128 (p, &utmp);
+      ret = p + utmp;
+
+      fs->saw_z = 1;
+      ++aug;
+    }
+
+  /* Iterate over recognized augmentation subsequences.  */
+  while (*aug != '\0')
+    {
+      /* "L" indicates a byte showing how the LSDA pointer is encoded.  */
+      if (aug[0] == 'L')
+	{
+	  fs->lsda_encoding = *p++;
+	  aug += 1;
+	}
+
+      /* "R" indicates a byte indicating how FDE addresses are encoded.  */
+      else if (aug[0] == 'R')
+	{
+	  fs->fde_encoding = *p++;
+	  aug += 1;
+	}
+
+      /* "P" indicates a personality routine in the CIE augmentation.  */
+      else if (aug[0] == 'P')
+	{
+	  _Unwind_Ptr personality;
+	  
+	  p = read_encoded_value (context, *p, p + 1, &personality);
+	  fs->personality = (_Unwind_Personality_Fn) personality;
+	  aug += 1;
+	}
+
+      /* "S" indicates a signal frame.  */
+      else if (aug[0] == 'S')
+	{
+	  fs->signal_frame = 1;
+	  aug += 1;
+	}
+
+      /* Otherwise we have an unknown augmentation string.
+	 Bail unless we saw a 'z' prefix.  */
+      else
+	return ret;
+    }
+
+  return ret ? ret : p;
+}
+
+/* Given the _Unwind_Context CONTEXT for a stack frame, look up the FDE for
+   its caller and decode it into FS.  This function also sets the
+   lsda member of CONTEXT, as it is really information
+   about the caller's frame.  */
+
+static _Unwind_Reason_Code
+uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  const struct dwarf_fde *fde;
+  const struct dwarf_cie *cie;
+  const unsigned char *aug;
+  int window_size;
+  _Unwind_Word *ra_ptr;
+
+  memset (fs, 0, sizeof (*fs));
+  context->lsda = 0;
+
+  fde = _Unwind_Find_FDE (context->ra + _Unwind_IsSignalFrame (context) - 1,
+			  &context->bases);
+  if (fde == NULL)
+    {
+#ifdef MD_FALLBACK_FRAME_STATE_FOR
+      _Unwind_Reason_Code reason;
+      /* Couldn't find frame unwind info for this function.  Try a
+	 target-specific fallback mechanism.  This will necessarily
+	 not provide a personality routine or LSDA.  */
+      reason = MD_FALLBACK_FRAME_STATE_FOR (context, fs);
+      if (reason != _URC_END_OF_STACK)
+	return reason;
+#endif
+      /* The frame was not recognized and handled by the fallback function,
+	 but it is not really the end of the stack.  Fall through here and
+	 unwind it anyway.  */
+    }
+  else
+    {
+      cie = get_cie (fde);
+      if (extract_cie_info (cie, context, fs) == NULL)
+	/* CIE contained unknown augmentation.  */
+	return _URC_FATAL_PHASE1_ERROR;
+
+      /* Locate augmentation for the fde.  */
+      aug = (const unsigned char *) fde + sizeof (*fde);
+      aug += 2 * size_of_encoded_value (fs->fde_encoding);
+      if (fs->saw_z)
+	{
+	  _uleb128_t i;
+	  aug = read_uleb128 (aug, &i);
+	}
+      if (fs->lsda_encoding != DW_EH_PE_omit)
+	{
+	  _Unwind_Ptr lsda;
+
+	  aug = read_encoded_value (context, fs->lsda_encoding, aug, &lsda);
+	  context->lsda = (void *) lsda;
+	}
+    }
+
+  /* Check for the end of the stack.  This needs to be checked after
+     the MD_FALLBACK_FRAME_STATE_FOR check for signal frames because
+     the contents of context->reg[0] are undefined at a signal frame,
+     and register a0 may appear to be zero.  (The return address in
+     context->ra comes from register a4 or a8).  */
+  ra_ptr = context->reg[0];
+  if (ra_ptr && *ra_ptr == 0)
+    return _URC_END_OF_STACK;
+
+  /* Find the window size from the high bits of the return address.  */
+  if (ra_ptr)
+    window_size = (*ra_ptr >> 30) * 4;
+  else
+    window_size = 8;
+
+  fs->retaddr_column = window_size;
+
+  return _URC_NO_REASON;
+}
+
+static void
+uw_update_context_1 (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  struct _Unwind_Context orig_context = *context;
+  _Unwind_Word *sp, *cfa, *next_cfa;
+  int i;
+
+  if (fs->signal_regs)
+    {
+      cfa = (_Unwind_Word *) fs->signal_regs[1];
+      next_cfa = (_Unwind_Word *) cfa[-3];
+
+      for (i = 0; i < 4; i++)
+	context->reg[i] = fs->signal_regs + (i << 2);
+    }
+  else
+    {
+      int window_size = fs->retaddr_column >> 2;
+
+      sp = (_Unwind_Word *) orig_context.sp;
+      cfa = (_Unwind_Word *) orig_context.cfa;
+      next_cfa = (_Unwind_Word *) cfa[-3];
+
+      /* Registers a0-a3 are in the save area below sp.  */
+      context->reg[0] = sp - 4;
+
+      /* Find the extra save area below next_cfa.  */
+      for (i = 1; i < window_size; i++)
+	context->reg[i] = next_cfa - 4 * (1 + window_size - i);
+
+      /* Remaining registers rotate from previous save areas.  */
+      for (i = window_size; i < 4; i++)
+	context->reg[i] = orig_context.reg[i - window_size];
+    }
+
+  context->sp = cfa;
+  context->cfa = next_cfa;
+
+  _Unwind_SetSignalFrame (context, fs->signal_frame);
+}
+
+/* CONTEXT describes the unwind state for a frame, and FS describes the FDE
+   of its caller.  Update CONTEXT to refer to the caller as well.  Note
+   that the lsda member is not updated here, but later in
+   uw_frame_state_for.  */
+
+static void
+uw_update_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  uw_update_context_1 (context, fs);
+
+  /* Compute the return address now, since the return address column
+     can change from frame to frame.  */
+  if (fs->signal_ra != 0)
+    context->ra = (void *) fs->signal_ra;
+  else
+    context->ra = (void *) ((_Unwind_GetGR (context, fs->retaddr_column)
+			     & XTENSA_RA_FIELD_MASK) | context->ra_high_bits);
+}
+
+static void
+uw_advance_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  uw_update_context (context, fs);
+}
+
+/* Fill in CONTEXT for top-of-stack.  The only valid registers at this
+   level will be the return address and the CFA.  */
+
+#define uw_init_context(CONTEXT)					   \
+  do									   \
+    {									   \
+      __builtin_unwind_init ();						   \
+      uw_init_context_1 (CONTEXT, __builtin_dwarf_cfa (),		   \
+			 __builtin_return_address (0));			   \
+    }									   \
+  while (0)
+
+static void __attribute__((noinline))
+uw_init_context_1 (struct _Unwind_Context *context, void *outer_cfa,
+		   void *outer_ra)
+{
+  void *ra = __builtin_return_address (0);
+  void *cfa = __builtin_dwarf_cfa ();
+  _Unwind_FrameState fs;
+
+  memset (context, 0, sizeof (struct _Unwind_Context));
+  context->ra = ra;
+
+  memset (&fs, 0, sizeof (fs));
+  fs.retaddr_column = 8;
+  context->sp = cfa;
+  context->cfa = outer_cfa;
+  context->ra_high_bits =
+    ((_Unwind_Word) uw_init_context_1) & ~XTENSA_RA_FIELD_MASK;
+  uw_update_context_1 (context, &fs);
+
+  context->ra = outer_ra;
+}
+
+
+/* Install TARGET into CURRENT so that we can return to it.  This is a
+   macro because __builtin_eh_return must be invoked in the context of
+   our caller.  */
+
+#define uw_install_context(CURRENT, TARGET)				 \
+  do									 \
+    {									 \
+      long offset = uw_install_context_1 ((CURRENT), (TARGET));		 \
+      void *handler = __builtin_frob_return_addr ((TARGET)->ra);	 \
+      __builtin_eh_return (offset, handler);				 \
+    }									 \
+  while (0)
+
+static long
+uw_install_context_1 (struct _Unwind_Context *current,
+		      struct _Unwind_Context *target)
+{
+  long i;
+
+  /* The eh_return insn assumes a window size of 8, so don't bother copying
+     the save areas for registers a8-a15 since they won't be reloaded.  */
+  for (i = 0; i < 2; ++i)
+    {
+      void *c = current->reg[i];
+      void *t = target->reg[i];
+
+      if (t && c && t != c)
+	memcpy (c, t, 4 * sizeof (_Unwind_Word));
+    }
+
+  return 0;
+}
+
+static inline _Unwind_Ptr
+uw_identify_context (struct _Unwind_Context *context)
+{
+  return _Unwind_GetCFA (context);
+}
+
+
+#include "unwind.inc"
+
+#if defined (USE_GAS_SYMVER) && defined (SHARED) && defined (USE_LIBUNWIND_EXCEPTIONS)
+alias (_Unwind_Backtrace);
+alias (_Unwind_DeleteException);
+alias (_Unwind_FindEnclosingFunction);
+alias (_Unwind_ForcedUnwind);
+alias (_Unwind_GetDataRelBase);
+alias (_Unwind_GetTextRelBase);
+alias (_Unwind_GetCFA);
+alias (_Unwind_GetGR);
+alias (_Unwind_GetIP);
+alias (_Unwind_GetLanguageSpecificData);
+alias (_Unwind_GetRegionStart);
+alias (_Unwind_RaiseException);
+alias (_Unwind_Resume);
+alias (_Unwind_Resume_or_Rethrow);
+alias (_Unwind_SetGR);
+alias (_Unwind_SetIP);
+#endif
+
+#endif /* !USING_SJLJ_EXCEPTIONS */
diff --git a/gcc/config/xtensa/unwind-dw2-xtensa.h b/gcc/config/xtensa/unwind-dw2-xtensa.h
new file mode 100644
index 000000000..d13b3264c
--- /dev/null
+++ b/gcc/config/xtensa/unwind-dw2-xtensa.h
@@ -0,0 +1,50 @@
+/* DWARF2 frame unwind data structure for Xtensa.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2007, 2008,
+   2009  Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* A target can override (perhaps for backward compatibility) how
+   many dwarf2 columns are unwound.  */
+#ifndef DWARF_FRAME_REGISTERS
+#define DWARF_FRAME_REGISTERS FIRST_PSEUDO_REGISTER
+#endif
+
+/* Xtensa's variable-size register window save areas can be unwound without
+   any unwind info.  This is a stripped down version of the standard DWARF
+   _Unwind_FrameState.  */
+typedef struct
+{
+  /* The information we care about from the CIE/FDE.  */
+  _Unwind_Personality_Fn personality;
+  _Unwind_Word retaddr_column;
+  unsigned char fde_encoding;
+  unsigned char lsda_encoding;
+  unsigned char saw_z;
+  unsigned char signal_frame;
+  void *eh_ptr;
+
+  /* Saved registers for a signal frame.  */
+  _Unwind_Word *signal_regs;
+  _Unwind_Word signal_ra;
+} _Unwind_FrameState;
+
diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
new file mode 100644
index 000000000..0d1738f4e
--- /dev/null
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -0,0 +1,74 @@
+/* Prototypes of target machine for GNU compiler for Xtensa.
+   Copyright 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef __XTENSA_PROTOS_H__
+#define __XTENSA_PROTOS_H__
+
+/* Functions to test whether an immediate fits in a given field.  */
+extern bool xtensa_simm8 (HOST_WIDE_INT);
+extern bool xtensa_simm8x256 (HOST_WIDE_INT);
+extern bool xtensa_simm12b (HOST_WIDE_INT);
+extern bool xtensa_b4const_or_zero (HOST_WIDE_INT);
+extern bool xtensa_b4constu (HOST_WIDE_INT);
+extern bool xtensa_mask_immediate (HOST_WIDE_INT);
+extern bool xtensa_mem_offset (unsigned, enum machine_mode);
+
+/* Functions within xtensa.c that we reference.  */
+#ifdef RTX_CODE
+extern int xt_true_regnum (rtx);
+extern int xtensa_valid_move (enum machine_mode, rtx *);
+extern int smalloffset_mem_p (rtx);
+extern int constantpool_mem_p (rtx);
+extern void xtensa_extend_reg (rtx, rtx);
+extern void xtensa_expand_conditional_branch (rtx *, enum machine_mode);
+extern int xtensa_expand_conditional_move (rtx *, int);
+extern int xtensa_expand_scc (rtx *, enum machine_mode);
+extern int xtensa_expand_block_move (rtx *);
+extern void xtensa_split_operand_pair (rtx *, enum machine_mode);
+extern int xtensa_emit_move_sequence (rtx *, enum machine_mode);
+extern rtx xtensa_copy_incoming_a7 (rtx);
+extern void xtensa_expand_nonlocal_goto (rtx *);
+extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx);
+extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool);
+extern void xtensa_emit_loop_end (rtx, rtx *);
+extern char *xtensa_emit_branch (bool, bool, rtx *);
+extern char *xtensa_emit_bit_branch (bool, bool, rtx *);
+extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
+extern char *xtensa_emit_call (int, rtx *);
+extern bool xtensa_tls_referenced_p (rtx);
+
+#ifdef TREE_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
+#endif /* TREE_CODE */
+
+extern void print_operand (FILE *, rtx, int);
+extern void print_operand_address (FILE *, rtx);
+extern void xtensa_output_literal (FILE *, rtx, enum machine_mode, int);
+extern rtx xtensa_return_addr (int, rtx);
+#endif /* RTX_CODE */
+
+extern void xtensa_setup_frame_addresses (void);
+extern int xtensa_dbx_register_number (int);
+extern long compute_frame_size (int);
+extern void xtensa_expand_prologue (void);
+extern void order_regs_for_local_alloc (void);
+
+#endif /* !__XTENSA_PROTOS_H__ */
diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c
new file mode 100644
index 000000000..e7c395be5
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.c
@@ -0,0 +1,3715 @@
+/* Subroutines for insn-output.c for Tensilica's Xtensa architecture.
+   Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+#include "recog.h"
+#include "output.h"
+#include "tree.h"
+#include "expr.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "gimple.h"
+#include "df.h"
+
+
+/* Enumeration for all of the relational tests, so that we can build
+   arrays indexed by the test type, and not worry about the order
+   of EQ, NE, etc.  */
+
+enum internal_test
+{
+  ITEST_EQ,
+  ITEST_NE,
+  ITEST_GT,
+  ITEST_GE,
+  ITEST_LT,
+  ITEST_LE,
+  ITEST_GTU,
+  ITEST_GEU,
+  ITEST_LTU,
+  ITEST_LEU,
+  ITEST_MAX
+};
+
+/* Array giving truth value on whether or not a given hard register
+   can support a given mode.  */
+char xtensa_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER];
+
+/* Current frame size calculated by compute_frame_size.  */
+unsigned xtensa_current_frame_size;
+
+/* Largest block move to handle in-line.  */
+#define LARGEST_MOVE_RATIO 15
+
+/* Define the structure for the machine field in struct function.  */
+struct GTY(()) machine_function
+{
+  int accesses_prev_frame;
+  bool need_a7_copy;
+  bool vararg_a7;
+  rtx vararg_a7_copy;
+  rtx set_frame_ptr_insn;
+};
+
+/* Vector, indexed by hard register number, which contains 1 for a
+   register that is allowable in a candidate for leaf function
+   treatment.  */
+
+const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] =
+{
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1
+};
+
+/* Map hard register number to register class */
+const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER] =
+{
+  RL_REGS,	SP_REG,		RL_REGS,	RL_REGS,
+  RL_REGS,	RL_REGS,	RL_REGS,	GR_REGS,
+  RL_REGS,	RL_REGS,	RL_REGS,	RL_REGS,
+  RL_REGS,	RL_REGS,	RL_REGS,	RL_REGS,
+  AR_REGS,	AR_REGS,	BR_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  ACC_REG,
+};
+
+static void xtensa_option_override (void);
+static enum internal_test map_test_to_internal_test (enum rtx_code);
+static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *);
+static rtx gen_float_relational (enum rtx_code, rtx, rtx);
+static rtx gen_conditional_move (enum rtx_code, enum machine_mode, rtx, rtx);
+static rtx fixup_subreg_mem (rtx);
+static struct machine_function * xtensa_init_machine_status (void);
+static rtx xtensa_legitimize_tls_address (rtx);
+static rtx xtensa_legitimize_address (rtx, rtx, enum machine_mode);
+static bool xtensa_mode_dependent_address_p (const_rtx);
+static bool xtensa_return_in_msb (const_tree);
+static void printx (FILE *, signed int);
+static void xtensa_function_epilogue (FILE *, HOST_WIDE_INT);
+static rtx xtensa_builtin_saveregs (void);
+static bool xtensa_legitimate_address_p (enum machine_mode, rtx, bool);
+static unsigned int xtensa_multibss_section_type_flags (tree, const char *,
+							int) ATTRIBUTE_UNUSED;
+static section *xtensa_select_rtx_section (enum machine_mode, rtx,
+					   unsigned HOST_WIDE_INT);
+static bool xtensa_rtx_costs (rtx, int, int, int *, bool);
+static int xtensa_register_move_cost (enum machine_mode, reg_class_t,
+				      reg_class_t);
+static int xtensa_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static tree xtensa_build_builtin_va_list (void);
+static bool xtensa_return_in_memory (const_tree, const_tree);
+static tree xtensa_gimplify_va_arg_expr (tree, tree, gimple_seq *,
+					 gimple_seq *);
+static void xtensa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+					 const_tree, bool);
+static rtx xtensa_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+				const_tree, bool);
+static rtx xtensa_function_incoming_arg (CUMULATIVE_ARGS *,
+					 enum machine_mode, const_tree, bool);
+static rtx xtensa_function_value (const_tree, const_tree, bool);
+static rtx xtensa_libcall_value (enum machine_mode, const_rtx);
+static bool xtensa_function_value_regno_p (const unsigned int);
+static unsigned int xtensa_function_arg_boundary (enum machine_mode,
+						  const_tree);
+static void xtensa_init_builtins (void);
+static tree xtensa_fold_builtin (tree, int, tree *, bool);
+static rtx xtensa_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static void xtensa_va_start (tree, rtx);
+static bool xtensa_frame_pointer_required (void);
+static rtx xtensa_static_chain (const_tree, bool);
+static void xtensa_asm_trampoline_template (FILE *);
+static void xtensa_trampoline_init (rtx, tree, rtx);
+static bool xtensa_output_addr_const_extra (FILE *, rtx);
+
+static reg_class_t xtensa_preferred_reload_class (rtx, reg_class_t);
+static reg_class_t xtensa_preferred_output_reload_class (rtx, reg_class_t);
+static reg_class_t xtensa_secondary_reload (bool, rtx, reg_class_t,
+					    enum machine_mode,
+					    struct secondary_reload_info *);
+
+static bool constantpool_address_p (const_rtx addr);
+
+static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] =
+  REG_ALLOC_ORDER;
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+
+static const struct default_options xtensa_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    /* Reordering blocks for Xtensa is not a good idea unless the
+       compiler understands the range of conditional branches.
+       Currently all branch relaxation for Xtensa is handled in the
+       assembler, so GCC cannot do a good job of reordering blocks.
+       Do not enable reordering unless it is explicitly requested.  */
+    { OPT_LEVELS_ALL, OPT_freorder_blocks, NULL, 0 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+
+/* This macro generates the assembly code for function exit,
+   on machines that need it.  If FUNCTION_EPILOGUE is not defined
+   then individual return instructions are generated for each
+   return statement.  Args are same as for FUNCTION_PROLOGUE.  */
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE xtensa_function_epilogue
+
+/* These hooks specify assembly directives for creating certain kinds
+   of integer object.  */
+
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION  xtensa_select_rtx_section
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS xtensa_legitimize_address
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P xtensa_mode_dependent_address_p
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST xtensa_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS xtensa_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST xtensa_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START xtensa_va_start
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY xtensa_return_in_memory
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE xtensa_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE xtensa_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P xtensa_function_value_regno_p
+
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE xtensa_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG xtensa_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG xtensa_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY xtensa_function_arg_boundary
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS xtensa_builtin_saveregs
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR xtensa_gimplify_va_arg_expr
+
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB xtensa_return_in_msb
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS xtensa_init_builtins
+#undef  TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN xtensa_fold_builtin
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN xtensa_expand_builtin
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS xtensa_preferred_reload_class
+#undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS xtensa_preferred_output_reload_class
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD xtensa_secondary_reload
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS (TARGET_THREADPTR && HAVE_AS_TLS)
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM xtensa_tls_referenced_p
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	xtensa_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED xtensa_frame_pointer_required
+
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN xtensa_static_chain
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE xtensa_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT xtensa_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE xtensa_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE xtensa_option_optimization_table
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA xtensa_output_addr_const_extra
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Functions to test Xtensa immediate operand validity.  */
+
+bool
+xtensa_simm8 (HOST_WIDE_INT v)
+{
+  return v >= -128 && v <= 127;
+}
+
+
+bool
+xtensa_simm8x256 (HOST_WIDE_INT v)
+{
+  return (v & 255) == 0 && (v >= -32768 && v <= 32512);
+}
+
+
+bool
+xtensa_simm12b (HOST_WIDE_INT v)
+{
+  return v >= -2048 && v <= 2047;
+}
+
+
+static bool
+xtensa_uimm8 (HOST_WIDE_INT v)
+{
+  return v >= 0 && v <= 255;
+}
+
+
+static bool
+xtensa_uimm8x2 (HOST_WIDE_INT v)
+{
+  return (v & 1) == 0 && (v >= 0 && v <= 510);
+}
+
+
+static bool
+xtensa_uimm8x4 (HOST_WIDE_INT v)
+{
+  return (v & 3) == 0 && (v >= 0 && v <= 1020);
+}
+
+
+static bool
+xtensa_b4const (HOST_WIDE_INT v)
+{
+  switch (v)
+    {
+    case -1:
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+    case 8:
+    case 10:
+    case 12:
+    case 16:
+    case 32:
+    case 64:
+    case 128:
+    case 256:
+      return true;
+    }
+  return false;
+}
+
+
+bool
+xtensa_b4const_or_zero (HOST_WIDE_INT v)
+{
+  if (v == 0)
+    return true;
+  return xtensa_b4const (v);
+}
+
+
+bool
+xtensa_b4constu (HOST_WIDE_INT v)
+{
+  switch (v)
+    {
+    case 32768:
+    case 65536:
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+    case 8:
+    case 10:
+    case 12:
+    case 16:
+    case 32:
+    case 64:
+    case 128:
+    case 256:
+      return true;
+    }
+  return false;
+}
+
+
+bool
+xtensa_mask_immediate (HOST_WIDE_INT v)
+{
+#define MAX_MASK_SIZE 16
+  int mask_size;
+
+  for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++)
+    {
+      if ((v & 1) == 0)
+	return false;
+      v = v >> 1;
+      if (v == 0)
+	return true;
+    }
+
+  return false;
+}
+
+
+/* This is just like the standard true_regnum() function except that it
+   works even when reg_renumber is not initialized.  */
+
+int
+xt_true_regnum (rtx x)
+{
+  if (GET_CODE (x) == REG)
+    {
+      if (reg_renumber
+	  && REGNO (x) >= FIRST_PSEUDO_REGISTER
+	  && reg_renumber[REGNO (x)] >= 0)
+	return reg_renumber[REGNO (x)];
+      return REGNO (x);
+    }
+  if (GET_CODE (x) == SUBREG)
+    {
+      int base = xt_true_regnum (SUBREG_REG (x));
+      if (base >= 0 && base < FIRST_PSEUDO_REGISTER)
+        return base + subreg_regno_offset (REGNO (SUBREG_REG (x)),
+                                           GET_MODE (SUBREG_REG (x)),
+                                           SUBREG_BYTE (x), GET_MODE (x));
+    }
+  return -1;
+}
+
+
+int
+xtensa_valid_move (enum machine_mode mode, rtx *operands)
+{
+  /* Either the destination or source must be a register, and the
+     MAC16 accumulator doesn't count.  */
+
+  if (register_operand (operands[0], mode))
+    {
+      int dst_regnum = xt_true_regnum (operands[0]);
+
+      /* The stack pointer can only be assigned with a MOVSP opcode.  */
+      if (dst_regnum == STACK_POINTER_REGNUM)
+	return (mode == SImode
+		&& register_operand (operands[1], mode)
+		&& !ACC_REG_P (xt_true_regnum (operands[1])));
+
+      if (!ACC_REG_P (dst_regnum))
+	return true;
+    }
+  if (register_operand (operands[1], mode))
+    {
+      int src_regnum = xt_true_regnum (operands[1]);
+      if (!ACC_REG_P (src_regnum))
+	return true;
+    }
+  return FALSE;
+}
+
+
+int
+smalloffset_mem_p (rtx op)
+{
+  if (GET_CODE (op) == MEM)
+    {
+      rtx addr = XEXP (op, 0);
+      if (GET_CODE (addr) == REG)
+	return BASE_REG_P (addr, 0);
+      if (GET_CODE (addr) == PLUS)
+	{
+	  rtx offset = XEXP (addr, 0);
+	  HOST_WIDE_INT val;
+	  if (GET_CODE (offset) != CONST_INT)
+	    offset = XEXP (addr, 1);
+	  if (GET_CODE (offset) != CONST_INT)
+	    return FALSE;
+
+	  val = INTVAL (offset);
+	  return (val & 3) == 0 && (val >= 0 && val <= 60);
+	}
+    }
+  return FALSE;
+}
+
+
+static bool
+constantpool_address_p (const_rtx addr)
+{
+  const_rtx sym = addr;
+
+  if (GET_CODE (addr) == CONST)
+    {
+      rtx offset;
+
+      /* Only handle (PLUS (SYM, OFFSET)) form.  */
+      addr = XEXP (addr, 0);
+      if (GET_CODE (addr) != PLUS)
+	return false;
+
+      /* Make sure the address is word aligned.  */
+      offset = XEXP (addr, 1);
+      if ((!CONST_INT_P (offset))
+	  || ((INTVAL (offset) & 3) != 0))
+	return false;
+
+      sym = XEXP (addr, 0);
+    }
+
+  if ((GET_CODE (sym) == SYMBOL_REF)
+      && CONSTANT_POOL_ADDRESS_P (sym))
+    return true;
+  return false;
+}
+
+
+int
+constantpool_mem_p (rtx op)
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (GET_CODE (op) == MEM)
+    return constantpool_address_p (XEXP (op, 0));
+  return FALSE;
+}
+
+
+/* Return TRUE if X is a thread-local symbol.  */
+
+static bool
+xtensa_tls_symbol_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+
+void
+xtensa_extend_reg (rtx dst, rtx src)
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (GET_MODE (src)));
+
+  /* Generate paradoxical subregs as needed so that the modes match.  */
+  src = simplify_gen_subreg (SImode, src, GET_MODE (src), 0);
+  dst = simplify_gen_subreg (SImode, dst, GET_MODE (dst), 0);
+
+  emit_insn (gen_ashlsi3 (temp, src, shift));
+  emit_insn (gen_ashrsi3 (dst, temp, shift));
+}
+
+
+bool
+xtensa_mem_offset (unsigned v, enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case BLKmode:
+      /* Handle the worst case for block moves.  See xtensa_expand_block_move
+	 where we emit an optimized block move operation if the block can be
+	 moved in < "move_ratio" pieces.  The worst case is when the block is
+	 aligned but has a size of (3 mod 4) (does this happen?) so that the
+	 last piece requires a byte load/store.  */
+      return (xtensa_uimm8 (v)
+	      && xtensa_uimm8 (v + MOVE_MAX * LARGEST_MOVE_RATIO));
+
+    case QImode:
+      return xtensa_uimm8 (v);
+
+    case HImode:
+      return xtensa_uimm8x2 (v);
+
+    case DFmode:
+      return (xtensa_uimm8x4 (v) && xtensa_uimm8x4 (v + 4));
+
+    default:
+      break;
+    }
+
+  return xtensa_uimm8x4 (v);
+}
+
+
+/* Make normal rtx_code into something we can index from an array.  */
+
+static enum internal_test
+map_test_to_internal_test (enum rtx_code test_code)
+{
+  enum internal_test test = ITEST_MAX;
+
+  switch (test_code)
+    {
+    default:			break;
+    case EQ:  test = ITEST_EQ;  break;
+    case NE:  test = ITEST_NE;  break;
+    case GT:  test = ITEST_GT;  break;
+    case GE:  test = ITEST_GE;  break;
+    case LT:  test = ITEST_LT;  break;
+    case LE:  test = ITEST_LE;  break;
+    case GTU: test = ITEST_GTU; break;
+    case GEU: test = ITEST_GEU; break;
+    case LTU: test = ITEST_LTU; break;
+    case LEU: test = ITEST_LEU; break;
+    }
+
+  return test;
+}
+
+
+/* Generate the code to compare two integer values.  The return value is
+   the comparison expression.  */
+
+static rtx
+gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */
+		    rtx cmp0, /* first operand to compare */
+		    rtx cmp1, /* second operand to compare */
+		    int *p_invert /* whether branch needs to reverse test */)
+{
+  struct cmp_info
+  {
+    enum rtx_code test_code;	/* test code to use in insn */
+    bool (*const_range_p) (HOST_WIDE_INT); /* range check function */
+    int const_add;		/* constant to add (convert LE -> LT) */
+    int reverse_regs;		/* reverse registers in test */
+    int invert_const;		/* != 0 if invert value if cmp1 is constant */
+    int invert_reg;		/* != 0 if invert value if cmp1 is register */
+    int unsignedp;		/* != 0 for unsigned comparisons.  */
+  };
+
+  static struct cmp_info info[ (int)ITEST_MAX ] = {
+
+    { EQ,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* EQ  */
+    { NE,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* NE  */
+
+    { LT,	xtensa_b4const_or_zero,	1, 1, 1, 0, 0 },	/* GT  */
+    { GE,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* GE  */
+    { LT,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* LT  */
+    { GE,	xtensa_b4const_or_zero,	1, 1, 1, 0, 0 },	/* LE  */
+
+    { LTU,	xtensa_b4constu,	1, 1, 1, 0, 1 },	/* GTU */
+    { GEU,	xtensa_b4constu,	0, 0, 0, 0, 1 },	/* GEU */
+    { LTU,	xtensa_b4constu,	0, 0, 0, 0, 1 },	/* LTU */
+    { GEU,	xtensa_b4constu,	1, 1, 1, 0, 1 },	/* LEU */
+  };
+
+  enum internal_test test;
+  enum machine_mode mode;
+  struct cmp_info *p_info;
+
+  test = map_test_to_internal_test (test_code);
+  gcc_assert (test != ITEST_MAX);
+
+  p_info = &info[ (int)test ];
+
+  mode = GET_MODE (cmp0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (cmp1);
+
+  /* Make sure we can handle any constants given to us.  */
+  if (GET_CODE (cmp1) == CONST_INT)
+    {
+      HOST_WIDE_INT value = INTVAL (cmp1);
+      unsigned HOST_WIDE_INT uvalue = (unsigned HOST_WIDE_INT)value;
+
+      /* if the immediate overflows or does not fit in the immediate field,
+	 spill it to a register */
+
+      if ((p_info->unsignedp ?
+	   (uvalue + p_info->const_add > uvalue) :
+	   (value + p_info->const_add > value)) != (p_info->const_add > 0))
+	{
+	  cmp1 = force_reg (mode, cmp1);
+	}
+      else if (!(p_info->const_range_p) (value + p_info->const_add))
+	{
+	  cmp1 = force_reg (mode, cmp1);
+	}
+    }
+  else if ((GET_CODE (cmp1) != REG) && (GET_CODE (cmp1) != SUBREG))
+    {
+      cmp1 = force_reg (mode, cmp1);
+    }
+
+  /* See if we need to invert the result.  */
+  *p_invert = ((GET_CODE (cmp1) == CONST_INT)
+	       ? p_info->invert_const
+	       : p_info->invert_reg);
+
+  /* Comparison to constants, may involve adding 1 to change a LT into LE.
+     Comparison between two registers, may involve switching operands.  */
+  if (GET_CODE (cmp1) == CONST_INT)
+    {
+      if (p_info->const_add != 0)
+	cmp1 = GEN_INT (INTVAL (cmp1) + p_info->const_add);
+
+    }
+  else if (p_info->reverse_regs)
+    {
+      rtx temp = cmp0;
+      cmp0 = cmp1;
+      cmp1 = temp;
+    }
+
+  return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1);
+}
+
+
+/* Generate the code to compare two float values.  The return value is
+   the comparison expression.  */
+
+static rtx
+gen_float_relational (enum rtx_code test_code, /* relational test (EQ, etc) */
+		      rtx cmp0, /* first operand to compare */
+		      rtx cmp1 /* second operand to compare */)
+{
+  rtx (*gen_fn) (rtx, rtx, rtx);
+  rtx brtmp;
+  int reverse_regs, invert;
+
+  switch (test_code)
+    {
+    case EQ: reverse_regs = 0; invert = 0; gen_fn = gen_seq_sf; break;
+    case NE: reverse_regs = 0; invert = 1; gen_fn = gen_seq_sf; break;
+    case LE: reverse_regs = 0; invert = 0; gen_fn = gen_sle_sf; break;
+    case GT: reverse_regs = 1; invert = 0; gen_fn = gen_slt_sf; break;
+    case LT: reverse_regs = 0; invert = 0; gen_fn = gen_slt_sf; break;
+    case GE: reverse_regs = 1; invert = 0; gen_fn = gen_sle_sf; break;
+    case UNEQ: reverse_regs = 0; invert = 0; gen_fn = gen_suneq_sf; break;
+    case LTGT: reverse_regs = 0; invert = 1; gen_fn = gen_suneq_sf; break;
+    case UNLE: reverse_regs = 0; invert = 0; gen_fn = gen_sunle_sf; break;
+    case UNGT: reverse_regs = 1; invert = 0; gen_fn = gen_sunlt_sf; break;
+    case UNLT: reverse_regs = 0; invert = 0; gen_fn = gen_sunlt_sf; break;
+    case UNGE: reverse_regs = 1; invert = 0; gen_fn = gen_sunle_sf; break;
+    case UNORDERED:
+      reverse_regs = 0; invert = 0; gen_fn = gen_sunordered_sf; break;
+    case ORDERED:
+      reverse_regs = 0; invert = 1; gen_fn = gen_sunordered_sf; break;
+    default:
+      fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1));
+      reverse_regs = 0; invert = 0; gen_fn = 0; /* avoid compiler warnings */
+    }
+
+  if (reverse_regs)
+    {
+      rtx temp = cmp0;
+      cmp0 = cmp1;
+      cmp1 = temp;
+    }
+
+  brtmp = gen_rtx_REG (CCmode, FPCC_REGNUM);
+  emit_insn (gen_fn (brtmp, cmp0, cmp1));
+
+  return gen_rtx_fmt_ee (invert ? EQ : NE, VOIDmode, brtmp, const0_rtx);
+}
+
+
+void
+xtensa_expand_conditional_branch (rtx *operands, enum machine_mode mode)
+{
+  enum rtx_code test_code = GET_CODE (operands[0]);
+  rtx cmp0 = operands[1];
+  rtx cmp1 = operands[2];
+  rtx cmp;
+  int invert;
+  rtx label1, label2;
+
+  switch (mode)
+    {
+    case DFmode:
+    default:
+      fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1));
+
+    case SImode:
+      invert = FALSE;
+      cmp = gen_int_relational (test_code, cmp0, cmp1, &invert);
+      break;
+
+    case SFmode:
+      if (!TARGET_HARD_FLOAT)
+	fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode,
+						cmp0, cmp1));
+      invert = FALSE;
+      cmp = gen_float_relational (test_code, cmp0, cmp1);
+      break;
+    }
+
+  /* Generate the branch.  */
+
+  label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  label2 = pc_rtx;
+
+  if (invert)
+    {
+      label2 = label1;
+      label1 = pc_rtx;
+    }
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode, cmp,
+						     label1,
+						     label2)));
+}
+
+
+static rtx
+gen_conditional_move (enum rtx_code code, enum machine_mode mode,
+		      rtx op0, rtx op1)
+{
+  if (mode == SImode)
+    {
+      rtx cmp;
+
+      /* Jump optimization calls get_condition() which canonicalizes
+	 comparisons like (GE x <const>) to (GT x <const-1>).
+	 Transform those comparisons back to GE, since that is the
+	 comparison supported in Xtensa.  We shouldn't have to
+	 transform <LE x const> comparisons, because neither
+	 xtensa_expand_conditional_branch() nor get_condition() will
+	 produce them.  */
+
+      if ((code == GT) && (op1 == constm1_rtx))
+	{
+	  code = GE;
+	  op1 = const0_rtx;
+	}
+      cmp = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx);
+
+      if (boolean_operator (cmp, VOIDmode))
+	{
+	  /* Swap the operands to make const0 second.  */
+	  if (op0 == const0_rtx)
+	    {
+	      op0 = op1;
+	      op1 = const0_rtx;
+	    }
+
+	  /* If not comparing against zero, emit a comparison (subtract).  */
+	  if (op1 != const0_rtx)
+	    {
+	      op0 = expand_binop (SImode, sub_optab, op0, op1,
+				  0, 0, OPTAB_LIB_WIDEN);
+	      op1 = const0_rtx;
+	    }
+	}
+      else if (branch_operator (cmp, VOIDmode))
+	{
+	  /* Swap the operands to make const0 second.  */
+	  if (op0 == const0_rtx)
+	    {
+	      op0 = op1;
+	      op1 = const0_rtx;
+
+	      switch (code)
+		{
+		case LT: code = GE; break;
+		case GE: code = LT; break;
+		default: gcc_unreachable ();
+		}
+	    }
+
+	  if (op1 != const0_rtx)
+	    return 0;
+	}
+      else
+	return 0;
+
+      return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+    }
+
+  if (TARGET_HARD_FLOAT && mode == SFmode)
+    return gen_float_relational (code, op0, op1);
+
+  return 0;
+}
+
+
+int
+xtensa_expand_conditional_move (rtx *operands, int isflt)
+{
+  rtx dest = operands[0];
+  rtx cmp = operands[1];
+  enum machine_mode cmp_mode = GET_MODE (XEXP (cmp, 0));
+  rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
+
+  if (!(cmp = gen_conditional_move (GET_CODE (cmp), cmp_mode,
+				    XEXP (cmp, 0), XEXP (cmp, 1))))
+    return 0;
+
+  if (isflt)
+    gen_fn = (cmp_mode == SImode
+	      ? gen_movsfcc_internal0
+	      : gen_movsfcc_internal1);
+  else
+    gen_fn = (cmp_mode == SImode
+	      ? gen_movsicc_internal0
+	      : gen_movsicc_internal1);
+
+  emit_insn (gen_fn (dest, XEXP (cmp, 0), operands[2], operands[3], cmp));
+  return 1;
+}
+
+
+int
+xtensa_expand_scc (rtx operands[4], enum machine_mode cmp_mode)
+{
+  rtx dest = operands[0];
+  rtx cmp;
+  rtx one_tmp, zero_tmp;
+  rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
+
+  if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
+				    operands[2], operands[3])))
+    return 0;
+
+  one_tmp = gen_reg_rtx (SImode);
+  zero_tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_movsi (one_tmp, const_true_rtx));
+  emit_insn (gen_movsi (zero_tmp, const0_rtx));
+
+  gen_fn = (cmp_mode == SImode
+	    ? gen_movsicc_internal0
+	    : gen_movsicc_internal1);
+  emit_insn (gen_fn (dest, XEXP (cmp, 0), one_tmp, zero_tmp, cmp));
+  return 1;
+}
+
+
+/* Split OP[1] into OP[2,3] and likewise for OP[0] into OP[0,1].  MODE is
+   for the output, i.e., the input operands are twice as big as MODE.  */
+
+void
+xtensa_split_operand_pair (rtx operands[4], enum machine_mode mode)
+{
+  switch (GET_CODE (operands[1]))
+    {
+    case REG:
+      operands[3] = gen_rtx_REG (mode, REGNO (operands[1]) + 1);
+      operands[2] = gen_rtx_REG (mode, REGNO (operands[1]));
+      break;
+
+    case MEM:
+      operands[3] = adjust_address (operands[1], mode, GET_MODE_SIZE (mode));
+      operands[2] = adjust_address (operands[1], mode, 0);
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      split_double (operands[1], &operands[2], &operands[3]);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      operands[1] = gen_rtx_REG (mode, REGNO (operands[0]) + 1);
+      operands[0] = gen_rtx_REG (mode, REGNO (operands[0]));
+      break;
+
+    case MEM:
+      operands[1] = adjust_address (operands[0], mode, GET_MODE_SIZE (mode));
+      operands[0] = adjust_address (operands[0], mode, 0);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Emit insns to move operands[1] into operands[0].
+   Return 1 if we have written out everything that needs to be done to
+   do the move.  Otherwise, return 0 and the caller will emit the move
+   normally.  */
+
+int
+xtensa_emit_move_sequence (rtx *operands, enum machine_mode mode)
+{
+  rtx src = operands[1];
+
+  if (CONSTANT_P (src)
+      && (GET_CODE (src) != CONST_INT || ! xtensa_simm12b (INTVAL (src))))
+    {
+      rtx dst = operands[0];
+
+      if (xtensa_tls_referenced_p (src))
+	{
+	  rtx addend = NULL;
+
+	  if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS)
+	    {
+	      addend = XEXP (XEXP (src, 0), 1);
+	      src = XEXP (XEXP (src, 0), 0);
+	    }
+
+	  src = xtensa_legitimize_tls_address (src);
+	  if (addend)
+	    {
+	      src = gen_rtx_PLUS (mode, src, addend);
+	      src = force_operand (src, dst);
+	    }
+	  emit_move_insn (dst, src);
+	  return 1;
+	}
+
+      if (! TARGET_CONST16)
+	{
+	  src = force_const_mem (SImode, src);
+	  operands[1] = src;
+	}
+
+      /* PC-relative loads are always SImode, and CONST16 is only
+	 supported in the movsi pattern, so add a SUBREG for any other
+	 (smaller) mode.  */
+
+      if (mode != SImode)
+	{
+	  if (register_operand (dst, mode))
+	    {
+	      emit_move_insn (simplify_gen_subreg (SImode, dst, mode, 0), src);
+	      return 1;
+	    }
+	  else
+	    {
+	      src = force_reg (SImode, src);
+	      src = gen_lowpart_SUBREG (mode, src);
+	      operands[1] = src;
+	    }
+	}
+    }
+
+  if (!(reload_in_progress | reload_completed)
+      && !xtensa_valid_move (mode, operands))
+    operands[1] = force_reg (mode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+
+  /* During reload we don't want to emit (subreg:X (mem:Y)) since that
+     instruction won't be recognized after reload, so we remove the
+     subreg and adjust mem accordingly.  */
+  if (reload_in_progress)
+    {
+      operands[0] = fixup_subreg_mem (operands[0]);
+      operands[1] = fixup_subreg_mem (operands[1]);
+    }
+  return 0;
+}
+
+
+static rtx
+fixup_subreg_mem (rtx x)
+{
+  if (GET_CODE (x) == SUBREG
+      && GET_CODE (SUBREG_REG (x)) == REG
+      && REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER)
+    {
+      rtx temp =
+	gen_rtx_SUBREG (GET_MODE (x),
+			reg_equiv_mem [REGNO (SUBREG_REG (x))],
+			SUBREG_BYTE (x));
+      x = alter_subreg (&temp);
+    }
+  return x;
+}
+
+
+/* Check if an incoming argument in a7 is expected to be used soon and
+   if OPND is a register or register pair that includes a7.  If so,
+   create a new pseudo and copy a7 into that pseudo at the very
+   beginning of the function, followed by the special "set_frame_ptr"
+   unspec_volatile insn.  The return value is either the original
+   operand, if it is not a7, or the new pseudo containing a copy of
+   the incoming argument.  This is necessary because the register
+   allocator will ignore conflicts with a7 and may either assign some
+   other pseudo to a7 or use a7 as the hard_frame_pointer, clobbering
+   the incoming argument in a7.  By copying the argument out of a7 as
+   the very first thing, and then immediately following that with an
+   unspec_volatile to keep the scheduler away, we should avoid any
+   problems.  Putting the set_frame_ptr insn at the beginning, with
+   only the a7 copy before it, also makes it easier for the prologue
+   expander to initialize the frame pointer after the a7 copy and to
+   fix up the a7 copy to use the stack pointer instead of the frame
+   pointer.  */
+
+rtx
+xtensa_copy_incoming_a7 (rtx opnd)
+{
+  rtx entry_insns = 0;
+  rtx reg, tmp;
+  enum machine_mode mode;
+
+  if (!cfun->machine->need_a7_copy)
+    return opnd;
+
+  /* This function should never be called again once a7 has been copied.  */
+  gcc_assert (!cfun->machine->set_frame_ptr_insn);
+
+  mode = GET_MODE (opnd);
+
+  /* The operand using a7 may come in a later instruction, so just return
+     the original operand if it doesn't use a7.  */
+  reg = opnd;
+  if (GET_CODE (reg) == SUBREG)
+    {
+      gcc_assert (SUBREG_BYTE (reg) == 0);
+      reg = SUBREG_REG (reg);
+    }
+  if (GET_CODE (reg) != REG
+      || REGNO (reg) > A7_REG
+      || REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) <= A7_REG)
+    return opnd;
+
+  /* 1-word args will always be in a7; 2-word args in a6/a7.  */
+  gcc_assert (REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) - 1 == A7_REG);
+
+  cfun->machine->need_a7_copy = false;
+
+  /* Copy a7 to a new pseudo at the function entry.  Use gen_raw_REG to
+     create the REG for a7 so that hard_frame_pointer_rtx is not used.  */
+
+  start_sequence ();
+  tmp = gen_reg_rtx (mode);
+
+  switch (mode)
+    {
+    case DFmode:
+    case DImode:
+      /* Copy the value out of A7 here but keep the first word in A6 until
+	 after the set_frame_ptr insn.  Otherwise, the register allocator
+	 may decide to put "subreg (tmp, 0)" in A7 and clobber the incoming
+	 value.  */
+      emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 4),
+				     gen_raw_REG (SImode, A7_REG)));
+      break;
+    case SFmode:
+      emit_insn (gen_movsf_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    case SImode:
+      emit_insn (gen_movsi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    case HImode:
+      emit_insn (gen_movhi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    case QImode:
+      emit_insn (gen_movqi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  cfun->machine->set_frame_ptr_insn = emit_insn (gen_set_frame_ptr ());
+
+  /* For DF and DI mode arguments, copy the incoming value in A6 now.  */
+  if (mode == DFmode || mode == DImode)
+    emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 0),
+				   gen_rtx_REG (SImode, A7_REG - 1)));
+  entry_insns = get_insns ();
+  end_sequence ();
+
+  if (cfun->machine->vararg_a7)
+    {
+      /* This is called from within builtin_saveregs, which will insert the
+	 saveregs code at the function entry, ahead of anything placed at
+	 the function entry now.  Instead, save the sequence to be inserted
+	 at the beginning of the saveregs code.  */
+      cfun->machine->vararg_a7_copy = entry_insns;
+    }
+  else
+    {
+      /* Put entry_insns after the NOTE that starts the function.  If
+	 this is inside a start_sequence, make the outer-level insn
+	 chain current, so the code is placed at the start of the
+	 function.  */
+      push_topmost_sequence ();
+      /* Do not use entry_of_function() here.  This is called from within
+	 expand_function_start, when the CFG still holds GIMPLE.  */
+      emit_insn_after (entry_insns, get_insns ());
+      pop_topmost_sequence ();
+    }
+
+  return tmp;
+}
+
+
+/* Try to expand a block move operation to a sequence of RTL move
+   instructions.  If not optimizing, or if the block size is not a
+   constant, or if the block is too large, the expansion fails and GCC
+   falls back to calling memcpy().
+
+   operands[0] is the destination
+   operands[1] is the source
+   operands[2] is the length
+   operands[3] is the alignment */
+
+int
+xtensa_expand_block_move (rtx *operands)
+{
+  static const enum machine_mode mode_from_align[] =
+  {
+    VOIDmode, QImode, HImode, VOIDmode, SImode,
+  };
+
+  rtx dst_mem = operands[0];
+  rtx src_mem = operands[1];
+  HOST_WIDE_INT bytes, align;
+  int num_pieces, move_ratio;
+  rtx temp[2];
+  enum machine_mode mode[2];
+  int amount[2];
+  bool active[2];
+  int phase = 0;
+  int next;
+  int offset_ld = 0;
+  int offset_st = 0;
+  rtx x;
+
+  /* If this is not a fixed size move, just call memcpy.  */
+  if (!optimize || (GET_CODE (operands[2]) != CONST_INT))
+    return 0;
+
+  bytes = INTVAL (operands[2]);
+  align = INTVAL (operands[3]);
+
+  /* Anything to move?  */
+  if (bytes <= 0)
+    return 0;
+
+  if (align > MOVE_MAX)
+    align = MOVE_MAX;
+
+  /* Decide whether to expand inline based on the optimization level.  */
+  move_ratio = 4;
+  if (optimize > 2)
+    move_ratio = LARGEST_MOVE_RATIO;
+  num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway.  */
+  if (num_pieces > move_ratio)
+    return 0;
+
+  x = XEXP (dst_mem, 0);
+  if (!REG_P (x))
+    {
+      x = force_reg (Pmode, x);
+      dst_mem = replace_equiv_address (dst_mem, x);
+    }
+
+  x = XEXP (src_mem, 0);
+  if (!REG_P (x))
+    {
+      x = force_reg (Pmode, x);
+      src_mem = replace_equiv_address (src_mem, x);
+    }
+
+  active[0] = active[1] = false;
+
+  do
+    {
+      next = phase;
+      phase ^= 1;
+
+      if (bytes > 0)
+	{
+	  int next_amount;
+
+	  next_amount = (bytes >= 4 ? 4 : (bytes >= 2 ? 2 : 1));
+	  next_amount = MIN (next_amount, align);
+
+	  amount[next] = next_amount;
+	  mode[next] = mode_from_align[next_amount];
+	  temp[next] = gen_reg_rtx (mode[next]);
+
+	  x = adjust_address (src_mem, mode[next], offset_ld);
+	  emit_insn (gen_rtx_SET (VOIDmode, temp[next], x));
+
+	  offset_ld += next_amount;
+	  bytes -= next_amount;
+	  active[next] = true;
+	}
+
+      if (active[phase])
+	{
+	  active[phase] = false;
+	  
+	  x = adjust_address (dst_mem, mode[phase], offset_st);
+	  emit_insn (gen_rtx_SET (VOIDmode, x, temp[phase]));
+
+	  offset_st += amount[phase];
+	}
+    }
+  while (active[next]);
+
+  return 1;
+}
+
+
+void
+xtensa_expand_nonlocal_goto (rtx *operands)
+{
+  rtx goto_handler = operands[1];
+  rtx containing_fp = operands[3];
+
+  /* Generate a call to "__xtensa_nonlocal_goto" (in libgcc); the code
+     is too big to generate in-line.  */
+
+  if (GET_CODE (containing_fp) != REG)
+    containing_fp = force_reg (Pmode, containing_fp);
+
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_nonlocal_goto"),
+		     LCT_NORMAL, VOIDmode, 2,
+		     containing_fp, Pmode,
+		     goto_handler, Pmode);
+}
+
+
+static struct machine_function *
+xtensa_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+
+/* Shift VAL of mode MODE left by COUNT bits.  */
+
+static inline rtx
+xtensa_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
+{
+  val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
+			     NULL_RTX, 1, OPTAB_DIRECT);
+  return expand_simple_binop (SImode, ASHIFT, val, count,
+			      NULL_RTX, 1, OPTAB_DIRECT);
+}
+
+
+/* Structure to hold the initial parameters for a compare_and_swap operation
+   in HImode and QImode.  */
+
+struct alignment_context
+{
+  rtx memsi;	  /* SI aligned memory location.  */
+  rtx shift;	  /* Bit offset with regard to lsb.  */
+  rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
+  rtx modemaski;  /* ~modemask */
+};
+
+
+/* Initialize structure AC for word access to HI and QI mode memory.  */
+
+static void
+init_alignment_context (struct alignment_context *ac, rtx mem)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx byteoffset = NULL_RTX;
+  bool aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
+
+  if (aligned)
+    ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
+  else
+    {
+      /* Alignment is unknown.  */
+      rtx addr, align;
+
+      /* Force the address into a register.  */
+      addr = force_reg (Pmode, XEXP (mem, 0));
+
+      /* Align it to SImode.  */
+      align = expand_simple_binop (Pmode, AND, addr,
+				   GEN_INT (-GET_MODE_SIZE (SImode)),
+				   NULL_RTX, 1, OPTAB_DIRECT);
+      /* Generate MEM.  */
+      ac->memsi = gen_rtx_MEM (SImode, align);
+      MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
+      set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
+      set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
+
+      byteoffset = expand_simple_binop (Pmode, AND, addr,
+					GEN_INT (GET_MODE_SIZE (SImode) - 1),
+					NULL_RTX, 1, OPTAB_DIRECT);
+    }
+
+  /* Calculate shiftcount.  */
+  if (TARGET_BIG_ENDIAN)
+    {
+      ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
+      if (!aligned)
+	ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
+					 NULL_RTX, 1, OPTAB_DIRECT);
+    }
+  else
+    {
+      if (aligned)
+	ac->shift = NULL_RTX;
+      else
+	ac->shift = byteoffset;
+    }
+
+  if (ac->shift != NULL_RTX)
+    {
+      /* Shift is the byte count, but we need the bitcount.  */
+      ac->shift = expand_simple_binop (SImode, MULT, ac->shift,
+				       GEN_INT (BITS_PER_UNIT),
+				       NULL_RTX, 1, OPTAB_DIRECT);
+      ac->modemask = expand_simple_binop (SImode, ASHIFT,
+					  GEN_INT (GET_MODE_MASK (mode)),
+					  ac->shift,
+					  NULL_RTX, 1, OPTAB_DIRECT);
+    }
+  else
+    ac->modemask = GEN_INT (GET_MODE_MASK (mode));
+
+  ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1);
+}
+
+
+/* Expand an atomic compare and swap operation for HImode and QImode.
+   MEM is the memory location, CMP the old value to compare MEM with
+   and NEW_RTX the value to set if CMP == MEM.  */
+
+void
+xtensa_expand_compare_and_swap (rtx target, rtx mem, rtx cmp, rtx new_rtx)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  struct alignment_context ac;
+  rtx tmp, cmpv, newv, val;
+  rtx oldval = gen_reg_rtx (SImode);
+  rtx res = gen_reg_rtx (SImode);
+  rtx csloop = gen_label_rtx ();
+  rtx csend = gen_label_rtx ();
+
+  init_alignment_context (&ac, mem);
+
+  if (ac.shift != NULL_RTX)
+    {
+      cmp = xtensa_expand_mask_and_shift (cmp, mode, ac.shift);
+      new_rtx = xtensa_expand_mask_and_shift (new_rtx, mode, ac.shift);
+    }
+
+  /* Load the surrounding word into VAL with the MEM value masked out.  */
+  val = force_reg (SImode, expand_simple_binop (SImode, AND, ac.memsi,
+						ac.modemaski, NULL_RTX, 1,
+						OPTAB_DIRECT));
+  emit_label (csloop);
+
+  /* Patch CMP and NEW_RTX into VAL at correct position.  */
+  cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val,
+						 NULL_RTX, 1, OPTAB_DIRECT));
+  newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val,
+						 NULL_RTX, 1, OPTAB_DIRECT));
+
+  /* Jump to end if we're done.  */
+  emit_insn (gen_sync_compare_and_swapsi (res, ac.memsi, cmpv, newv));
+  emit_cmp_and_jump_insns (res, cmpv, EQ, const0_rtx, SImode, true, csend);
+
+  /* Check for changes outside mode.  */
+  emit_move_insn (oldval, val);
+  tmp = expand_simple_binop (SImode, AND, res, ac.modemaski,
+			     val, 1, OPTAB_DIRECT);
+  if (tmp != val)
+    emit_move_insn (val, tmp);
+
+  /* Loop internal if so.  */
+  emit_cmp_and_jump_insns (oldval, val, NE, const0_rtx, SImode, true, csloop);
+
+  emit_label (csend);
+
+  /* Return the correct part of the bitfield.  */
+  convert_move (target,
+		(ac.shift == NULL_RTX ? res
+		 : expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
+					NULL_RTX, 1, OPTAB_DIRECT)),
+		1);
+}
+
+
+/* Expand an atomic operation CODE of mode MODE (either HImode or QImode --
+   the default expansion works fine for SImode).  MEM is the memory location
+   and VAL the value to play with.  If AFTER is true then store the value
+   MEM holds after the operation, if AFTER is false then store the value MEM
+   holds before the operation.  If TARGET is zero then discard that value, else
+   store it to TARGET.  */
+
+void
+xtensa_expand_atomic (enum rtx_code code, rtx target, rtx mem, rtx val,
+		      bool after)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  struct alignment_context ac;
+  rtx csloop = gen_label_rtx ();
+  rtx cmp, tmp;
+  rtx old = gen_reg_rtx (SImode);
+  rtx new_rtx = gen_reg_rtx (SImode);
+  rtx orig = NULL_RTX;
+
+  init_alignment_context (&ac, mem);
+
+  /* Prepare values before the compare-and-swap loop.  */
+  if (ac.shift != NULL_RTX)
+    val = xtensa_expand_mask_and_shift (val, mode, ac.shift);
+  switch (code)
+    {
+    case PLUS:
+    case MINUS:
+      orig = gen_reg_rtx (SImode);
+      convert_move (orig, val, 1);
+      break;
+
+    case SET:
+    case IOR:
+    case XOR:
+      break;
+
+    case MULT: /* NAND */
+    case AND:
+      /* val = "11..1<val>11..1" */
+      val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Load full word.  Subsequent loads are performed by S32C1I.  */
+  cmp = force_reg (SImode, ac.memsi);
+
+  emit_label (csloop);
+  emit_move_insn (old, cmp);
+
+  switch (code)
+    {
+    case PLUS:
+    case MINUS:
+      val = expand_simple_binop (SImode, code, old, orig,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      val = expand_simple_binop (SImode, AND, val, ac.modemask,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* FALLTHRU */
+    case SET:
+      tmp = expand_simple_binop (SImode, AND, old, ac.modemaski,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      tmp = expand_simple_binop (SImode, IOR, tmp, val,
+				 new_rtx, 1, OPTAB_DIRECT);
+      break;
+
+    case AND:
+    case IOR:
+    case XOR:
+      tmp = expand_simple_binop (SImode, code, old, val,
+				 new_rtx, 1, OPTAB_DIRECT);
+      break;
+
+    case MULT: /* NAND */
+      tmp = expand_simple_binop (SImode, XOR, old, ac.modemask,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      tmp = expand_simple_binop (SImode, AND, tmp, val,
+				 new_rtx, 1, OPTAB_DIRECT);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (tmp != new_rtx)
+    emit_move_insn (new_rtx, tmp);
+  emit_insn (gen_sync_compare_and_swapsi (cmp, ac.memsi, old, new_rtx));
+  emit_cmp_and_jump_insns (cmp, old, NE, const0_rtx, SImode, true, csloop);
+
+  if (target)
+    {
+      tmp = (after ? new_rtx : cmp);
+      convert_move (target,
+		    (ac.shift == NULL_RTX ? tmp
+		     : expand_simple_binop (SImode, LSHIFTRT, tmp, ac.shift,
+					    NULL_RTX, 1, OPTAB_DIRECT)),
+		    1);
+    }
+}
+
+
+void
+xtensa_setup_frame_addresses (void)
+{
+  /* Set flag to cause TARGET_FRAME_POINTER_REQUIRED to return true.  */
+  cfun->machine->accesses_prev_frame = 1;
+
+  emit_library_call
+    (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_libgcc_window_spill"),
+     LCT_NORMAL, VOIDmode, 0);
+}
+
+
+/* Emit the assembly for the end of a zero-cost loop.  Normally we just emit
+   a comment showing where the end of the loop is.  However, if there is a
+   label or a branch at the end of the loop then we need to place a nop
+   there.  If the loop ends with a label we need the nop so that branches
+   targeting that label will target the nop (and thus remain in the loop),
+   instead of targeting the instruction after the loop (and thus exiting
+   the loop).  If the loop ends with a branch, we need the nop in case the
+   branch is targeting a location inside the loop.  When the branch
+   executes it will cause the loop count to be decremented even if it is
+   taken (because it is the last instruction in the loop), so we need to
+   nop after the branch to prevent the loop count from being decremented
+   when the branch is taken.  */
+
+void
+xtensa_emit_loop_end (rtx insn, rtx *operands)
+{
+  char done = 0;
+
+  for (insn = PREV_INSN (insn); insn && !done; insn = PREV_INSN (insn))
+    {
+      switch (GET_CODE (insn))
+	{
+	case NOTE:
+	case BARRIER:
+	  break;
+
+	case CODE_LABEL:
+	  output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands);
+	  done = 1;
+	  break;
+
+	default:
+	  {
+	    rtx body = PATTERN (insn);
+
+	    if (GET_CODE (body) == JUMP_INSN)
+	      {
+		output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands);
+		done = 1;
+	      }
+	    else if ((GET_CODE (body) != USE)
+		     && (GET_CODE (body) != CLOBBER))
+	      done = 1;
+	  }
+	  break;
+        }
+    }
+
+  output_asm_insn ("# loop end for %0", operands);
+}
+
+
+char *
+xtensa_emit_branch (bool inverted, bool immed, rtx *operands)
+{
+  static char result[64];
+  enum rtx_code code;
+  const char *op;
+
+  code = GET_CODE (operands[3]);
+  switch (code)
+    {
+    case EQ:	op = inverted ? "ne" : "eq"; break;
+    case NE:	op = inverted ? "eq" : "ne"; break;
+    case LT:	op = inverted ? "ge" : "lt"; break;
+    case GE:	op = inverted ? "lt" : "ge"; break;
+    case LTU:	op = inverted ? "geu" : "ltu"; break;
+    case GEU:	op = inverted ? "ltu" : "geu"; break;
+    default:	gcc_unreachable ();
+    }
+
+  if (immed)
+    {
+      if (INTVAL (operands[1]) == 0)
+	sprintf (result, "b%sz%s\t%%0, %%2", op,
+		 (TARGET_DENSITY && (code == EQ || code == NE)) ? ".n" : "");
+      else
+	sprintf (result, "b%si\t%%0, %%d1, %%2", op);
+    }
+  else
+    sprintf (result, "b%s\t%%0, %%1, %%2", op);
+
+  return result;
+}
+
+
+char *
+xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands)
+{
+  static char result[64];
+  const char *op;
+
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:	op = inverted ? "bs" : "bc"; break;
+    case NE:	op = inverted ? "bc" : "bs"; break;
+    default:	gcc_unreachable ();
+    }
+
+  if (immed)
+    {
+      unsigned bitnum = INTVAL (operands[1]) & 0x1f; 
+      operands[1] = GEN_INT (bitnum); 
+      sprintf (result, "b%si\t%%0, %%d1, %%2", op);
+    }
+  else
+    sprintf (result, "b%s\t%%0, %%1, %%2", op);
+
+  return result;
+}
+
+
+char *
+xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands)
+{
+  static char result[64];
+  enum rtx_code code;
+  const char *op;
+
+  code = GET_CODE (operands[4]);
+  if (isbool)
+    {
+      switch (code)
+	{
+	case EQ:	op = inverted ? "t" : "f"; break;
+	case NE:	op = inverted ? "f" : "t"; break;
+	default:	gcc_unreachable ();
+	}
+    }
+  else
+    {
+      switch (code)
+	{
+	case EQ:	op = inverted ? "nez" : "eqz"; break;
+	case NE:	op = inverted ? "eqz" : "nez"; break;
+	case LT:	op = inverted ? "gez" : "ltz"; break;
+	case GE:	op = inverted ? "ltz" : "gez"; break;
+	default:	gcc_unreachable ();
+	}
+    }
+
+  sprintf (result, "mov%s%s\t%%0, %%%d, %%1",
+	   op, isfp ? ".s" : "", inverted ? 3 : 2);
+  return result;
+}
+
+
+char *
+xtensa_emit_call (int callop, rtx *operands)
+{
+  static char result[64];
+  rtx tgt = operands[callop];
+
+  if (GET_CODE (tgt) == CONST_INT)
+    sprintf (result, "call8\t0x%lx", INTVAL (tgt));
+  else if (register_operand (tgt, VOIDmode))
+    sprintf (result, "callx8\t%%%d", callop);
+  else
+    sprintf (result, "call8\t%%%d", callop);
+
+  return result;
+}
+
+
+bool
+xtensa_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
+{
+  /* Allow constant pool addresses.  */
+  if (mode != BLKmode && GET_MODE_SIZE (mode) >= UNITS_PER_WORD
+      && ! TARGET_CONST16 && constantpool_address_p (addr)
+      && ! xtensa_tls_referenced_p (addr))
+    return true;
+
+  while (GET_CODE (addr) == SUBREG)
+    addr = SUBREG_REG (addr);
+
+  /* Allow base registers.  */
+  if (GET_CODE (addr) == REG && BASE_REG_P (addr, strict))
+    return true;
+
+  /* Check for "register + offset" addressing.  */
+  if (GET_CODE (addr) == PLUS)
+    {
+      rtx xplus0 = XEXP (addr, 0);
+      rtx xplus1 = XEXP (addr, 1);
+      enum rtx_code code0;
+      enum rtx_code code1;
+
+      while (GET_CODE (xplus0) == SUBREG)
+	xplus0 = SUBREG_REG (xplus0);
+      code0 = GET_CODE (xplus0);
+
+      while (GET_CODE (xplus1) == SUBREG)
+	xplus1 = SUBREG_REG (xplus1);
+      code1 = GET_CODE (xplus1);
+
+      /* Swap operands if necessary so the register is first.  */
+      if (code0 != REG && code1 == REG)
+	{
+	  xplus0 = XEXP (addr, 1);
+	  xplus1 = XEXP (addr, 0);
+	  code0 = GET_CODE (xplus0);
+	  code1 = GET_CODE (xplus1);
+	}
+
+      if (code0 == REG && BASE_REG_P (xplus0, strict)
+	  && code1 == CONST_INT
+	  && xtensa_mem_offset (INTVAL (xplus1), mode))
+	return true;
+    }
+
+  return false;
+}
+
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
+
+static GTY(()) rtx xtensa_tls_module_base_symbol;
+
+static rtx
+xtensa_tls_module_base (void)
+{
+  if (! xtensa_tls_module_base_symbol)
+    {
+      xtensa_tls_module_base_symbol =
+	gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
+      SYMBOL_REF_FLAGS (xtensa_tls_module_base_symbol)
+        |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+    }
+
+  return xtensa_tls_module_base_symbol;
+}
+
+
+static rtx
+xtensa_call_tls_desc (rtx sym, rtx *retp)
+{
+  rtx fn, arg, a10, call_insn, insns;
+
+  start_sequence ();
+  fn = gen_reg_rtx (Pmode);
+  arg = gen_reg_rtx (Pmode);
+  a10 = gen_rtx_REG (Pmode, 10);
+
+  emit_insn (gen_tls_func (fn, sym));
+  emit_insn (gen_tls_arg (arg, sym));
+  emit_move_insn (a10, arg);
+  call_insn = emit_call_insn (gen_tls_call (a10, fn, sym, const1_rtx));
+  CALL_INSN_FUNCTION_USAGE (call_insn)
+    = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, a10),
+			 CALL_INSN_FUNCTION_USAGE (call_insn));
+  insns = get_insns ();
+  end_sequence ();
+
+  *retp = a10;
+  return insns;
+}
+
+
+static rtx
+xtensa_legitimize_tls_address (rtx x)
+{
+  unsigned int model = SYMBOL_REF_TLS_MODEL (x);
+  rtx dest, tp, ret, modbase, base, addend, insns;
+
+  dest = gen_reg_rtx (Pmode);
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      insns = xtensa_call_tls_desc (x, &ret);
+      emit_libcall_block (insns, dest, ret, x);
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      base = gen_reg_rtx (Pmode);
+      modbase = xtensa_tls_module_base ();
+      insns = xtensa_call_tls_desc (modbase, &ret);
+      emit_libcall_block (insns, base, ret, modbase);
+      addend = force_reg (SImode, gen_sym_DTPOFF (x));
+      emit_insn (gen_addsi3 (dest, base, addend));
+      break;
+
+    case TLS_MODEL_INITIAL_EXEC:
+    case TLS_MODEL_LOCAL_EXEC:
+      tp = gen_reg_rtx (SImode);
+      emit_insn (gen_load_tp (tp));
+      addend = force_reg (SImode, gen_sym_TPOFF (x));
+      emit_insn (gen_addsi3 (dest, tp, addend));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return dest;
+}
+
+
+rtx
+xtensa_legitimize_address (rtx x,
+			   rtx oldx ATTRIBUTE_UNUSED,
+			   enum machine_mode mode)
+{
+  if (xtensa_tls_symbol_p (x))
+    return xtensa_legitimize_tls_address (x);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx plus0 = XEXP (x, 0);
+      rtx plus1 = XEXP (x, 1);
+
+      if (GET_CODE (plus0) != REG && GET_CODE (plus1) == REG)
+	{
+	  plus0 = XEXP (x, 1);
+	  plus1 = XEXP (x, 0);
+	}
+
+      /* Try to split up the offset to use an ADDMI instruction.  */
+      if (GET_CODE (plus0) == REG
+	  && GET_CODE (plus1) == CONST_INT
+	  && !xtensa_mem_offset (INTVAL (plus1), mode)
+	  && !xtensa_simm8 (INTVAL (plus1))
+	  && xtensa_mem_offset (INTVAL (plus1) & 0xff, mode)
+	  && xtensa_simm8x256 (INTVAL (plus1) & ~0xff))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx addmi_offset = GEN_INT (INTVAL (plus1) & ~0xff);
+	  emit_insn (gen_rtx_SET (Pmode, temp,
+				  gen_rtx_PLUS (Pmode, plus0, addmi_offset)));
+	  return gen_rtx_PLUS (Pmode, temp, GEN_INT (INTVAL (plus1) & 0xff));
+	}
+    }
+
+  return x;
+}
+
+/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P.
+
+   Treat constant-pool references as "mode dependent" since they can
+   only be accessed with SImode loads.  This works around a bug in the
+   combiner where a constant pool reference is temporarily converted
+   to an HImode load, which is then assumed to zero-extend based on
+   our definition of LOAD_EXTEND_OP.  This is wrong because the high
+   bits of a 16-bit value in the constant pool are now sign-extended
+   by default.  */
+
+static bool
+xtensa_mode_dependent_address_p (const_rtx addr)
+{
+  return constantpool_address_p (addr);
+}
+
+/* Helper for xtensa_tls_referenced_p.  */
+
+static int
+xtensa_tls_referenced_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*x) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (*x) != 0;
+
+  /* Ignore TLS references that have already been legitimized.  */
+  if (GET_CODE (*x) == UNSPEC)
+    {
+      switch (XINT (*x, 1))
+	{
+	case UNSPEC_TPOFF:
+	case UNSPEC_DTPOFF:
+	case UNSPEC_TLS_FUNC:
+	case UNSPEC_TLS_ARG:
+	case UNSPEC_TLS_CALL:
+	  return -1;
+	default:
+	  break;
+	}
+    }
+
+  return 0;
+}
+
+
+/* Return TRUE if X contains any TLS symbol references.  */
+
+bool
+xtensa_tls_referenced_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, xtensa_tls_referenced_p_1, NULL);
+}
+
+
+/* Return the debugger register number to use for 'regno'.  */
+
+int
+xtensa_dbx_register_number (int regno)
+{
+  int first = -1;
+
+  if (GP_REG_P (regno))
+    {
+      regno -= GP_REG_FIRST;
+      first = 0;
+    }
+  else if (BR_REG_P (regno))
+    {
+      regno -= BR_REG_FIRST;
+      first = 16;
+    }
+  else if (FP_REG_P (regno))
+    {
+      regno -= FP_REG_FIRST;
+      first = 48;
+    }
+  else if (ACC_REG_P (regno))
+    {
+      first = 0x200;	/* Start of Xtensa special registers.  */
+      regno = 16;	/* ACCLO is special register 16.  */
+    }
+
+  /* When optimizing, we sometimes get asked about pseudo-registers
+     that don't represent hard registers.  Return 0 for these.  */
+  if (first == -1)
+    return 0;
+
+  return first + regno;
+}
+
+
+/* Argument support functions.  */
+
+/* Initialize CUMULATIVE_ARGS for a function.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, int incoming)
+{
+  cum->arg_words = 0;
+  cum->incoming = incoming;
+}
+
+
+/* Advance the argument to the next argument position.  */
+
+static void
+xtensa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			     const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int words, max;
+  int *arg_words;
+
+  arg_words = &cum->arg_words;
+  max = MAX_ARGS_IN_REGISTERS;
+
+  words = (((mode != BLKmode)
+	    ? (int) GET_MODE_SIZE (mode)
+	    : int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  if (*arg_words < max
+      && (targetm.calls.must_pass_in_stack (mode, type)
+	  || *arg_words + words > max))
+    *arg_words = max;
+
+  *arg_words += words;
+}
+
+
+/* Return an RTL expression containing the register for the given mode,
+   or 0 if the argument is to be passed on the stack.  INCOMING_P is nonzero
+   if this is an incoming argument to the current function.  */
+
+static rtx
+xtensa_function_arg_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		       const_tree type, bool incoming_p)
+{
+  int regbase, words, max;
+  int *arg_words;
+  int regno;
+
+  arg_words = &cum->arg_words;
+  regbase = (incoming_p ? GP_ARG_FIRST : GP_OUTGOING_ARG_FIRST);
+  max = MAX_ARGS_IN_REGISTERS;
+
+  words = (((mode != BLKmode)
+	    ? (int) GET_MODE_SIZE (mode)
+	    : int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  if (type && (TYPE_ALIGN (type) > BITS_PER_WORD))
+    {
+      int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_WORD;
+      *arg_words = (*arg_words + align - 1) & -align;
+    }
+
+  if (*arg_words + words > max)
+    return (rtx)0;
+
+  regno = regbase + *arg_words;
+
+  if (cum->incoming && regno <= A7_REG && regno + words > A7_REG)
+    cfun->machine->need_a7_copy = true;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Implement TARGET_FUNCTION_ARG.  */
+
+static rtx
+xtensa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		     const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return xtensa_function_arg_1 (cum, mode, type, false);
+}
+
+/* Implement TARGET_FUNCTION_INCOMING_ARG.  */
+
+static rtx
+xtensa_function_incoming_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return xtensa_function_arg_1 (cum, mode, type, true);
+}
+
+static unsigned int
+xtensa_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int alignment;
+
+  alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
+  if (alignment < PARM_BOUNDARY)
+    alignment = PARM_BOUNDARY;
+  if (alignment > STACK_BOUNDARY)
+    alignment = STACK_BOUNDARY;
+  return alignment;
+}
+
+
+static bool
+xtensa_return_in_msb (const_tree valtype)
+{
+  return (TARGET_BIG_ENDIAN
+	  && AGGREGATE_TYPE_P (valtype)
+	  && int_size_in_bytes (valtype) >= UNITS_PER_WORD);
+}
+
+
+static void
+xtensa_option_override (void)
+{
+  int regno;
+  enum machine_mode mode;
+
+  if (!TARGET_BOOLEANS && TARGET_HARD_FLOAT)
+    error ("boolean registers required for the floating-point option");
+
+  /* Set up array giving whether a given register can hold a given mode.  */
+  for (mode = VOIDmode;
+       mode != MAX_MACHINE_MODE;
+       mode = (enum machine_mode) ((int) mode + 1))
+    {
+      int size = GET_MODE_SIZE (mode);
+      enum mode_class mclass = GET_MODE_CLASS (mode);
+
+      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+	{
+	  int temp;
+
+	  if (ACC_REG_P (regno))
+	    temp = (TARGET_MAC16
+		    && (mclass == MODE_INT) && (size <= UNITS_PER_WORD));
+	  else if (GP_REG_P (regno))
+	    temp = ((regno & 1) == 0 || (size <= UNITS_PER_WORD));
+	  else if (FP_REG_P (regno))
+	    temp = (TARGET_HARD_FLOAT && (mode == SFmode));
+	  else if (BR_REG_P (regno))
+	    temp = (TARGET_BOOLEANS && (mode == CCmode));
+	  else
+	    temp = FALSE;
+
+	  xtensa_hard_regno_mode_ok[(int) mode][regno] = temp;
+	}
+    }
+
+  init_machine_status = xtensa_init_machine_status;
+
+  /* Check PIC settings.  PIC is only supported when using L32R
+     instructions, and some targets need to always use PIC.  */
+  if (flag_pic && TARGET_CONST16)
+    error ("-f%s is not supported with CONST16 instructions",
+	   (flag_pic > 1 ? "PIC" : "pic"));
+  else if (TARGET_FORCE_NO_PIC)
+    flag_pic = 0;
+  else if (XTENSA_ALWAYS_PIC)
+    {
+      if (TARGET_CONST16)
+	error ("PIC is required but not supported with CONST16 instructions");
+      flag_pic = 1;
+    }
+  /* There's no need for -fPIC (as opposed to -fpic) on Xtensa.  */
+  if (flag_pic > 1)
+    flag_pic = 1;
+  if (flag_pic && !flag_pie)
+    flag_shlib = 1;
+
+  /* Hot/cold partitioning does not work on this architecture, because of
+     constant pools (the load instruction cannot necessarily reach that far).
+     Therefore disable it on this architecture.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand X.  X is an RTL
+   expression.
+
+   CODE is a value that can be used to specify one of several ways
+   of printing the operand.  It is used when identical operands
+   must be printed differently depending on the context.  CODE
+   comes from the '%' specification that was used to request
+   printing of the operand.  If the specification was just '%DIGIT'
+   then CODE is 0; if the specification was '%LTR DIGIT' then CODE
+   is the ASCII code for LTR.
+
+   If X is a register, this macro should print the register's name.
+   The names can be found in an array 'reg_names' whose type is
+   'char *[]'.  'reg_names' is initialized from 'REGISTER_NAMES'.
+
+   When the machine description has a specification '%PUNCT' (a '%'
+   followed by a punctuation character), this macro is called with
+   a null pointer for X and the punctuation character for CODE.
+
+   'a', 'c', 'l', and 'n' are reserved.
+
+   The Xtensa specific codes are:
+
+   'd'  CONST_INT, print as signed decimal
+   'x'  CONST_INT, print as signed hexadecimal
+   'K'  CONST_INT, print number of bits in mask for EXTUI
+   'R'  CONST_INT, print (X & 0x1f)
+   'L'  CONST_INT, print ((32 - X) & 0x1f)
+   'D'  REG, print second register of double-word register operand
+   'N'  MEM, print address of next word following a memory operand
+   'v'  MEM, if memory reference is volatile, output a MEMW before it
+   't'  any constant, add "@h" suffix for top 16 bits
+   'b'  any constant, add "@l" suffix for bottom 16 bits
+*/
+
+static void
+printx (FILE *file, signed int val)
+{
+  /* Print a hexadecimal value in a nice way.  */
+  if ((val > -0xa) && (val < 0xa))
+    fprintf (file, "%d", val);
+  else if (val < 0)
+    fprintf (file, "-0x%x", -val);
+  else
+    fprintf (file, "0x%x", val);
+}
+
+
+void
+print_operand (FILE *file, rtx x, int letter)
+{
+  if (!x)
+    error ("PRINT_OPERAND null pointer");
+
+  switch (letter)
+    {
+    case 'D':
+      if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+	fprintf (file, "%s", reg_names[xt_true_regnum (x) + 1]);
+      else
+	output_operand_lossage ("invalid %%D value");
+      break;
+
+    case 'v':
+      if (GET_CODE (x) == MEM)
+	{
+	  /* For a volatile memory reference, emit a MEMW before the
+	     load or store.  */
+	  if (MEM_VOLATILE_P (x) && TARGET_SERIALIZE_VOLATILE)
+	    fprintf (file, "memw\n\t");
+	}
+      else
+	output_operand_lossage ("invalid %%v value");
+      break;
+
+    case 'N':
+      if (GET_CODE (x) == MEM
+	  && (GET_MODE (x) == DFmode || GET_MODE (x) == DImode))
+	{
+	  x = adjust_address (x, GET_MODE (x) == DFmode ? SFmode : SImode, 4);
+	  output_address (XEXP (x, 0));
+	}
+      else
+	output_operand_lossage ("invalid %%N value");
+      break;
+
+    case 'K':
+      if (GET_CODE (x) == CONST_INT)
+	{
+	  int num_bits = 0;
+	  unsigned val = INTVAL (x);
+	  while (val & 1)
+	    {
+	      num_bits += 1;
+	      val = val >> 1;
+	    }
+	  if ((val != 0) || (num_bits == 0) || (num_bits > 16))
+	    fatal_insn ("invalid mask", x);
+
+	  fprintf (file, "%d", num_bits);
+	}
+      else
+	output_operand_lossage ("invalid %%K value");
+      break;
+
+    case 'L':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", (32 - INTVAL (x)) & 0x1f);
+      else
+	output_operand_lossage ("invalid %%L value");
+      break;
+
+    case 'R':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", INTVAL (x) & 0x1f);
+      else
+	output_operand_lossage ("invalid %%R value");
+      break;
+
+    case 'x':
+      if (GET_CODE (x) == CONST_INT)
+	printx (file, INTVAL (x));
+      else
+	output_operand_lossage ("invalid %%x value");
+      break;
+
+    case 'd':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", INTVAL (x));
+      else
+	output_operand_lossage ("invalid %%d value");
+      break;
+
+    case 't':
+    case 'b':
+      if (GET_CODE (x) == CONST_INT)
+	{
+	  printx (file, INTVAL (x));
+	  fputs (letter == 't' ? "@h" : "@l", file);
+	}
+      else if (GET_CODE (x) == CONST_DOUBLE)
+	{
+	  REAL_VALUE_TYPE r;
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	  if (GET_MODE (x) == SFmode)
+	    {
+	      long l;
+	      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+	      fprintf (file, "0x%08lx@%c", l, letter == 't' ? 'h' : 'l');
+	    }
+	  else
+	    output_operand_lossage ("invalid %%t/%%b value");
+	}
+      else if (GET_CODE (x) == CONST)
+	{
+	  /* X must be a symbolic constant on ELF.  Write an expression
+	     suitable for 'const16' that sets the high or low 16 bits.  */
+	  if (GET_CODE (XEXP (x, 0)) != PLUS
+	      || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
+		  && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
+	      || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
+	    output_operand_lossage ("invalid %%t/%%b value");
+	  print_operand (file, XEXP (XEXP (x, 0), 0), 0);
+	  fputs (letter == 't' ? "@h" : "@l", file);
+	  /* There must be a non-alphanumeric character between 'h' or 'l'
+	     and the number.  The '-' is added by print_operand() already.  */
+	  if (INTVAL (XEXP (XEXP (x, 0), 1)) >= 0)
+	    fputs ("+", file);
+	  print_operand (file, XEXP (XEXP (x, 0), 1), 0);
+	}
+      else
+	{
+	  output_addr_const (file, x);
+	  fputs (letter == 't' ? "@h" : "@l", file);
+	}
+      break;
+
+    default:
+      if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+	fprintf (file, "%s", reg_names[xt_true_regnum (x)]);
+      else if (GET_CODE (x) == MEM)
+	output_address (XEXP (x, 0));
+      else if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", INTVAL (x));
+      else
+	output_addr_const (file, x);
+    }
+}
+
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.  */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  if (!addr)
+    error ("PRINT_OPERAND_ADDRESS, null pointer");
+
+  switch (GET_CODE (addr))
+    {
+    default:
+      fatal_insn ("invalid address", addr);
+      break;
+
+    case REG:
+      fprintf (file, "%s, 0", reg_names [REGNO (addr)]);
+      break;
+
+    case PLUS:
+      {
+	rtx reg = (rtx)0;
+	rtx offset = (rtx)0;
+	rtx arg0 = XEXP (addr, 0);
+	rtx arg1 = XEXP (addr, 1);
+
+	if (GET_CODE (arg0) == REG)
+	  {
+	    reg = arg0;
+	    offset = arg1;
+	  }
+	else if (GET_CODE (arg1) == REG)
+	  {
+	    reg = arg1;
+	    offset = arg0;
+	  }
+	else
+	  fatal_insn ("no register in address", addr);
+
+	if (CONSTANT_P (offset))
+	  {
+	    fprintf (file, "%s, ", reg_names [REGNO (reg)]);
+	    output_addr_const (file, offset);
+	  }
+	else
+	  fatal_insn ("address offset not a constant", addr);
+      }
+      break;
+
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_INT:
+    case CONST:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+xtensa_output_addr_const_extra (FILE *fp, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_TPOFF:
+	  output_addr_const (fp, XVECEXP (x, 0, 0));
+	  fputs ("@TPOFF", fp);
+	  return true;
+	case UNSPEC_DTPOFF:
+	  output_addr_const (fp, XVECEXP (x, 0, 0));
+	  fputs ("@DTPOFF", fp);
+	  return true;
+	case UNSPEC_PLT:
+	  if (flag_pic)
+	    {
+	      output_addr_const (fp, XVECEXP (x, 0, 0));
+	      fputs ("@PLT", fp);
+	      return true;
+	    }
+	  break;
+	default:
+	  break;
+	}
+    }
+  return false;
+}
+
+
+void
+xtensa_output_literal (FILE *file, rtx x, enum machine_mode mode, int labelno)
+{
+  long value_long[2];
+  REAL_VALUE_TYPE r;
+  int size;
+  rtx first, second;
+
+  fprintf (file, "\t.literal .LC%u, ", (unsigned) labelno);
+
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_FLOAT:
+      gcc_assert (GET_CODE (x) == CONST_DOUBLE);
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+      switch (mode)
+	{
+	case SFmode:
+	  REAL_VALUE_TO_TARGET_SINGLE (r, value_long[0]);
+	  if (HOST_BITS_PER_LONG > 32)
+	    value_long[0] &= 0xffffffff;
+	  fprintf (file, "0x%08lx\n", value_long[0]);
+	  break;
+
+	case DFmode:
+	  REAL_VALUE_TO_TARGET_DOUBLE (r, value_long);
+	  if (HOST_BITS_PER_LONG > 32)
+	    {
+	      value_long[0] &= 0xffffffff;
+	      value_long[1] &= 0xffffffff;
+	    }
+	  fprintf (file, "0x%08lx, 0x%08lx\n",
+		   value_long[0], value_long[1]);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      break;
+
+    case MODE_INT:
+    case MODE_PARTIAL_INT:
+      size = GET_MODE_SIZE (mode);
+      switch (size)
+	{
+	case 4:
+	  output_addr_const (file, x);
+	  fputs ("\n", file);
+	  break;
+
+	case 8:
+	  split_double (x, &first, &second);
+	  output_addr_const (file, first);
+	  fputs (", ", file);
+	  output_addr_const (file, second);
+	  fputs ("\n", file);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.  */
+
+#define STACK_BYTES (STACK_BOUNDARY / BITS_PER_UNIT)
+#define XTENSA_STACK_ALIGN(LOC) (((LOC) + STACK_BYTES-1) & ~(STACK_BYTES-1))
+
+long
+compute_frame_size (int size)
+{
+  /* Add space for the incoming static chain value.  */
+  if (cfun->static_chain_decl != NULL)
+    size += (1 * UNITS_PER_WORD);
+
+  xtensa_current_frame_size =
+    XTENSA_STACK_ALIGN (size
+			+ crtl->outgoing_args_size
+			+ (WINDOW_SIZE * UNITS_PER_WORD));
+  return xtensa_current_frame_size;
+}
+
+
+bool
+xtensa_frame_pointer_required (void)
+{
+  /* The code to expand builtin_frame_addr and builtin_return_addr
+     currently uses the hard_frame_pointer instead of frame_pointer.
+     This seems wrong but maybe it's necessary for other architectures.
+     This function is derived from the i386 code.  */
+
+  if (cfun->machine->accesses_prev_frame)
+    return true;
+
+  return false;
+}
+
+
+/* minimum frame = reg save area (4 words) plus static chain (1 word)
+   and the total number of words must be a multiple of 128 bits.  */
+#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
+
+void
+xtensa_expand_prologue (void)
+{
+  HOST_WIDE_INT total_size;
+  rtx size_rtx;
+  rtx insn, note_rtx;
+
+  total_size = compute_frame_size (get_frame_size ());
+  size_rtx = GEN_INT (total_size);
+
+  if (total_size < (1 << (12+3)))
+    insn = emit_insn (gen_entry (size_rtx));
+  else
+    {
+      /* Use a8 as a temporary since a0-a7 may be live.  */
+      rtx tmp_reg = gen_rtx_REG (Pmode, A8_REG);
+      emit_insn (gen_entry (GEN_INT (MIN_FRAME_SIZE)));
+      emit_move_insn (tmp_reg, GEN_INT (total_size - MIN_FRAME_SIZE));
+      emit_insn (gen_subsi3 (tmp_reg, stack_pointer_rtx, tmp_reg));
+      insn = emit_insn (gen_movsi (stack_pointer_rtx, tmp_reg));
+    }
+
+  if (frame_pointer_needed)
+    {
+      if (cfun->machine->set_frame_ptr_insn)
+	{
+	  rtx first;
+
+	  push_topmost_sequence ();
+	  first = get_insns ();
+	  pop_topmost_sequence ();
+
+	  /* For all instructions prior to set_frame_ptr_insn, replace
+	     hard_frame_pointer references with stack_pointer.  */
+	  for (insn = first;
+	       insn != cfun->machine->set_frame_ptr_insn;
+	       insn = NEXT_INSN (insn))
+	    {
+	      if (INSN_P (insn))
+		{
+		  PATTERN (insn) = replace_rtx (copy_rtx (PATTERN (insn)),
+						hard_frame_pointer_rtx,
+						stack_pointer_rtx);
+		  df_insn_rescan (insn);
+		}
+	    }
+	}
+      else
+	insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
+				     stack_pointer_rtx));
+    }
+
+  /* Create a note to describe the CFA.  Because this is only used to set
+     DW_AT_frame_base for debug info, don't bother tracking changes through
+     each instruction in the prologue.  It just takes up space.  */
+  note_rtx = gen_rtx_SET (VOIDmode, (frame_pointer_needed
+				     ? hard_frame_pointer_rtx
+				     : stack_pointer_rtx),
+			  plus_constant (stack_pointer_rtx, -total_size));
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+}
+
+
+/* Clear variables at function end.  */
+
+void
+xtensa_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			  HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  xtensa_current_frame_size = 0;
+}
+
+
+rtx
+xtensa_return_addr (int count, rtx frame)
+{
+  rtx result, retaddr, curaddr, label;
+
+  if (count == -1)
+    retaddr = gen_rtx_REG (Pmode, A0_REG);
+  else
+    {
+      rtx addr = plus_constant (frame, -4 * UNITS_PER_WORD);
+      addr = memory_address (Pmode, addr);
+      retaddr = gen_reg_rtx (Pmode);
+      emit_move_insn (retaddr, gen_rtx_MEM (Pmode, addr));
+    }
+
+  /* The 2 most-significant bits of the return address on Xtensa hold
+     the register window size.  To get the real return address, these
+     bits must be replaced with the high bits from some address in the
+     code.  */
+
+  /* Get the 2 high bits of a local label in the code.  */
+  curaddr = gen_reg_rtx (Pmode);
+  label = gen_label_rtx ();
+  emit_label (label);
+  LABEL_PRESERVE_P (label) = 1;
+  emit_move_insn (curaddr, gen_rtx_LABEL_REF (Pmode, label));
+  emit_insn (gen_lshrsi3 (curaddr, curaddr, GEN_INT (30)));
+  emit_insn (gen_ashlsi3 (curaddr, curaddr, GEN_INT (30)));
+
+  /* Clear the 2 high bits of the return address.  */
+  result = gen_reg_rtx (Pmode);
+  emit_insn (gen_ashlsi3 (result, retaddr, GEN_INT (2)));
+  emit_insn (gen_lshrsi3 (result, result, GEN_INT (2)));
+
+  /* Combine them to get the result.  */
+  emit_insn (gen_iorsi3 (result, result, curaddr));
+  return result;
+}
+
+
+/* Create the va_list data type.
+
+   This structure is set up by __builtin_saveregs.  The __va_reg field
+   points to a stack-allocated region holding the contents of the
+   incoming argument registers.  The __va_ndx field is an index
+   initialized to the position of the first unnamed (variable)
+   argument.  This same index is also used to address the arguments
+   passed in memory.  Thus, the __va_stk field is initialized to point
+   to the position of the first argument in memory offset to account
+   for the arguments passed in registers and to account for the size
+   of the argument registers not being 16-byte aligned.  E.G., there
+   are 6 argument registers of 4 bytes each, but we want the __va_ndx
+   for the first stack argument to have the maximal alignment of 16
+   bytes, so we offset the __va_stk address by 32 bytes so that
+   __va_stk[32] references the first argument on the stack.  */
+
+static tree
+xtensa_build_builtin_va_list (void)
+{
+  tree f_stk, f_reg, f_ndx, record, type_decl;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_stk = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__va_stk"),
+		      ptr_type_node);
+  f_reg = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__va_reg"),
+		      ptr_type_node);
+  f_ndx = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__va_ndx"),
+		      integer_type_node);
+
+  DECL_FIELD_CONTEXT (f_stk) = record;
+  DECL_FIELD_CONTEXT (f_reg) = record;
+  DECL_FIELD_CONTEXT (f_ndx) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_stk;
+  DECL_CHAIN (f_stk) = f_reg;
+  DECL_CHAIN (f_reg) = f_ndx;
+
+  layout_type (record);
+  return record;
+}
+
+
+/* Save the incoming argument registers on the stack.  Returns the
+   address of the saved registers.  */
+
+static rtx
+xtensa_builtin_saveregs (void)
+{
+  rtx gp_regs;
+  int arg_words = crtl->args.info.arg_words;
+  int gp_left = MAX_ARGS_IN_REGISTERS - arg_words;
+
+  if (gp_left <= 0)
+    return const0_rtx;
+
+  /* Allocate the general-purpose register space.  */
+  gp_regs = assign_stack_local
+    (BLKmode, MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD, -1);
+  set_mem_alias_set (gp_regs, get_varargs_alias_set ());
+
+  /* Now store the incoming registers.  */
+  cfun->machine->need_a7_copy = true;
+  cfun->machine->vararg_a7 = true;
+  move_block_from_reg (GP_ARG_FIRST + arg_words,
+		       adjust_address (gp_regs, BLKmode,
+				       arg_words * UNITS_PER_WORD),
+		       gp_left);
+  gcc_assert (cfun->machine->vararg_a7_copy != 0);
+  emit_insn_before (cfun->machine->vararg_a7_copy, get_insns ());
+
+  return XEXP (gp_regs, 0);
+}
+
+
+/* Implement `va_start' for varargs and stdarg.  We look at the
+   current function to fill in an initial va_list.  */
+
+static void
+xtensa_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  tree f_stk, stk;
+  tree f_reg, reg;
+  tree f_ndx, ndx;
+  tree t, u;
+  int arg_words;
+
+  arg_words = crtl->args.info.arg_words;
+
+  f_stk = TYPE_FIELDS (va_list_type_node);
+  f_reg = DECL_CHAIN (f_stk);
+  f_ndx = DECL_CHAIN (f_reg);
+
+  stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist, f_stk, NULL_TREE);
+  reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), unshare_expr (valist),
+		f_reg, NULL_TREE);
+  ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), unshare_expr (valist),
+		f_ndx, NULL_TREE);
+
+  /* Call __builtin_saveregs; save the result in __va_reg */
+  u = make_tree (sizetype, expand_builtin_saveregs ());
+  u = fold_convert (ptr_type_node, u);
+  t = build2 (MODIFY_EXPR, ptr_type_node, reg, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Set the __va_stk member to ($arg_ptr - 32).  */
+  u = make_tree (ptr_type_node, virtual_incoming_args_rtx);
+  u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u, size_int (-32));
+  t = build2 (MODIFY_EXPR, ptr_type_node, stk, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Set the __va_ndx member.  If the first variable argument is on
+     the stack, adjust __va_ndx by 2 words to account for the extra
+     alignment offset for __va_stk.  */
+  if (arg_words >= MAX_ARGS_IN_REGISTERS)
+    arg_words += 2;
+  t = build2 (MODIFY_EXPR, integer_type_node, ndx,
+	      build_int_cst (integer_type_node, arg_words * UNITS_PER_WORD));
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+
+/* Implement `va_arg'.  */
+
+static tree
+xtensa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			     gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  tree f_stk, stk;
+  tree f_reg, reg;
+  tree f_ndx, ndx;
+  tree type_size, array, orig_ndx, addr, size, va_size, t;
+  tree lab_false, lab_over, lab_false2;
+  bool indirect;
+
+  indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  if (indirect)
+    type = build_pointer_type (type);
+
+  /* Handle complex values as separate real and imaginary parts.  */
+  if (TREE_CODE (type) == COMPLEX_TYPE)
+    {
+      tree real_part, imag_part;
+
+      real_part = xtensa_gimplify_va_arg_expr (valist, TREE_TYPE (type),
+					       pre_p, NULL);
+      real_part = get_initialized_tmp_var (real_part, pre_p, NULL);
+
+      imag_part = xtensa_gimplify_va_arg_expr (unshare_expr (valist),
+					       TREE_TYPE (type),
+					       pre_p, NULL);
+      imag_part = get_initialized_tmp_var (imag_part, pre_p, NULL);
+
+      return build2 (COMPLEX_EXPR, type, real_part, imag_part);
+    }
+
+  f_stk = TYPE_FIELDS (va_list_type_node);
+  f_reg = DECL_CHAIN (f_stk);
+  f_ndx = DECL_CHAIN (f_reg);
+
+  stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist,
+		f_stk, NULL_TREE);
+  reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), unshare_expr (valist),
+		f_reg, NULL_TREE);
+  ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), unshare_expr (valist),
+		f_ndx, NULL_TREE);
+
+  type_size = size_in_bytes (type);
+  va_size = round_up (type_size, UNITS_PER_WORD);
+  gimplify_expr (&va_size, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+
+  /* First align __va_ndx if necessary for this arg:
+
+     orig_ndx = (AP).__va_ndx;
+     if (__alignof__ (TYPE) > 4 )
+       orig_ndx = ((orig_ndx + __alignof__ (TYPE) - 1)
+			& -__alignof__ (TYPE)); */
+
+  orig_ndx = get_initialized_tmp_var (ndx, pre_p, NULL);
+
+  if (TYPE_ALIGN (type) > BITS_PER_WORD)
+    {
+      int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_UNIT;
+
+      t = build2 (PLUS_EXPR, integer_type_node, unshare_expr (orig_ndx),
+		  build_int_cst (integer_type_node, align - 1));
+      t = build2 (BIT_AND_EXPR, integer_type_node, t,
+		  build_int_cst (integer_type_node, -align));
+      gimplify_assign (unshare_expr (orig_ndx), t, pre_p);
+    }
+
+
+  /* Increment __va_ndx to point past the argument:
+
+     (AP).__va_ndx = orig_ndx + __va_size (TYPE); */
+
+  t = fold_convert (integer_type_node, va_size);
+  t = build2 (PLUS_EXPR, integer_type_node, orig_ndx, t);
+  gimplify_assign (unshare_expr (ndx), t, pre_p);
+
+
+  /* Check if the argument is in registers:
+
+     if ((AP).__va_ndx <= __MAX_ARGS_IN_REGISTERS * 4
+         && !must_pass_in_stack (type))
+        __array = (AP).__va_reg; */
+
+  array = create_tmp_var (ptr_type_node, NULL);
+
+  lab_over = NULL;
+  if (!targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
+    {
+      lab_false = create_artificial_label (UNKNOWN_LOCATION);
+      lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+      t = build2 (GT_EXPR, boolean_type_node, unshare_expr (ndx),
+		  build_int_cst (integer_type_node,
+				 MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD));
+      t = build3 (COND_EXPR, void_type_node, t,
+		  build1 (GOTO_EXPR, void_type_node, lab_false),
+		  NULL_TREE);
+      gimplify_and_add (t, pre_p);
+
+      gimplify_assign (unshare_expr (array), reg, pre_p);
+
+      t = build1 (GOTO_EXPR, void_type_node, lab_over);
+      gimplify_and_add (t, pre_p);
+
+      t = build1 (LABEL_EXPR, void_type_node, lab_false);
+      gimplify_and_add (t, pre_p);
+    }
+
+
+  /* ...otherwise, the argument is on the stack (never split between
+     registers and the stack -- change __va_ndx if necessary):
+
+     else
+       {
+	 if (orig_ndx <= __MAX_ARGS_IN_REGISTERS * 4)
+	     (AP).__va_ndx = 32 + __va_size (TYPE);
+	 __array = (AP).__va_stk;
+       } */
+
+  lab_false2 = create_artificial_label (UNKNOWN_LOCATION);
+
+  t = build2 (GT_EXPR, boolean_type_node, unshare_expr (orig_ndx),
+	      build_int_cst (integer_type_node,
+			     MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD));
+  t = build3 (COND_EXPR, void_type_node, t,
+	      build1 (GOTO_EXPR, void_type_node, lab_false2),
+	      NULL_TREE);
+  gimplify_and_add (t, pre_p);
+
+  t = size_binop (PLUS_EXPR, unshare_expr (va_size), size_int (32));
+  t = fold_convert (integer_type_node, t);
+  gimplify_assign (unshare_expr (ndx), t, pre_p);
+
+  t = build1 (LABEL_EXPR, void_type_node, lab_false2);
+  gimplify_and_add (t, pre_p);
+
+  gimplify_assign (array, stk, pre_p);
+
+  if (lab_over)
+    {
+      t = build1 (LABEL_EXPR, void_type_node, lab_over);
+      gimplify_and_add (t, pre_p);
+    }
+
+
+  /* Given the base array pointer (__array) and index to the subsequent
+     argument (__va_ndx), find the address:
+
+     __array + (AP).__va_ndx - (BYTES_BIG_ENDIAN && sizeof (TYPE) < 4
+				? sizeof (TYPE)
+				: __va_size (TYPE))
+
+     The results are endian-dependent because values smaller than one word
+     are aligned differently.  */
+
+
+  if (BYTES_BIG_ENDIAN && TREE_CODE (type_size) == INTEGER_CST)
+    {
+      t = fold_build2 (GE_EXPR, boolean_type_node, unshare_expr (type_size),
+		       size_int (PARM_BOUNDARY / BITS_PER_UNIT));
+      t = fold_build3 (COND_EXPR, sizetype, t, unshare_expr (va_size),
+		       unshare_expr (type_size));
+      size = t;
+    }
+  else
+    size = unshare_expr (va_size);
+
+  t = fold_convert (sizetype, unshare_expr (ndx));
+  t = build2 (MINUS_EXPR, sizetype, t, size);
+  addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (array), t);
+
+  addr = fold_convert (build_pointer_type (type), addr);
+  if (indirect)
+    addr = build_va_arg_indirect_ref (addr);
+  return build_va_arg_indirect_ref (addr);
+}
+
+
+/* Builtins.  */
+
+enum xtensa_builtin
+{
+  XTENSA_BUILTIN_UMULSIDI3,
+  XTENSA_BUILTIN_THREAD_POINTER,
+  XTENSA_BUILTIN_SET_THREAD_POINTER,
+  XTENSA_BUILTIN_max
+};
+
+
+static void
+xtensa_init_builtins (void)
+{
+  tree ftype, decl;
+
+  ftype = build_function_type_list (unsigned_intDI_type_node,
+				    unsigned_intSI_type_node,
+				    unsigned_intSI_type_node, NULL_TREE);
+
+  decl = add_builtin_function ("__builtin_umulsidi3", ftype,
+			       XTENSA_BUILTIN_UMULSIDI3, BUILT_IN_MD,
+			       "__umulsidi3", NULL_TREE);
+  TREE_NOTHROW (decl) = 1;
+  TREE_READONLY (decl) = 1;
+
+  if (TARGET_THREADPTR)
+    {
+      ftype = build_function_type (ptr_type_node, void_list_node);
+      decl = add_builtin_function ("__builtin_thread_pointer", ftype,
+				   XTENSA_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
+				   NULL, NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      TREE_NOTHROW (decl) = 1;
+
+      ftype = build_function_type_list (void_type_node, ptr_type_node,
+					NULL_TREE);
+      decl = add_builtin_function ("__builtin_set_thread_pointer", ftype,
+				   XTENSA_BUILTIN_SET_THREAD_POINTER,
+				   BUILT_IN_MD, NULL, NULL_TREE);
+      TREE_NOTHROW (decl) = 1;
+    }
+}
+
+
+static tree
+xtensa_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
+		     bool ignore ATTRIBUTE_UNUSED)
+{
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg0, arg1;
+
+  switch (fcode)
+    {
+    case XTENSA_BUILTIN_UMULSIDI3:
+      arg0 = args[0];
+      arg1 = args[1];
+      if ((TREE_CODE (arg0) == INTEGER_CST && TREE_CODE (arg1) == INTEGER_CST)
+	  || TARGET_MUL32_HIGH)
+	return fold_build2 (MULT_EXPR, unsigned_intDI_type_node,
+			    fold_convert (unsigned_intDI_type_node, arg0),
+			    fold_convert (unsigned_intDI_type_node, arg1));
+      break;
+
+    case XTENSA_BUILTIN_THREAD_POINTER:
+    case XTENSA_BUILTIN_SET_THREAD_POINTER:
+      break;
+
+    default:
+      internal_error ("bad builtin code");
+      break;
+    }
+
+  return NULL;
+}
+
+
+static rtx
+xtensa_expand_builtin (tree exp, rtx target,
+		       rtx subtarget ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       int ignore)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  rtx arg;
+
+  switch (fcode)
+    {
+    case XTENSA_BUILTIN_UMULSIDI3:
+      /* The umulsidi3 builtin is just a mechanism to avoid calling the real
+	 __umulsidi3 function when the Xtensa configuration can directly
+	 implement it.  If not, just call the function.  */
+      return expand_call (exp, target, ignore);
+
+    case XTENSA_BUILTIN_THREAD_POINTER:
+      if (!target || !register_operand (target, Pmode))
+	target = gen_reg_rtx (Pmode);
+      emit_insn (gen_load_tp (target));
+      return target;
+
+    case XTENSA_BUILTIN_SET_THREAD_POINTER:
+      arg = expand_normal (CALL_EXPR_ARG (exp, 0));
+      if (!register_operand (arg, Pmode))
+	arg = copy_to_mode_reg (Pmode, arg);
+      emit_insn (gen_set_tp (arg));
+      return const0_rtx;
+
+    default:
+      internal_error ("bad builtin code");
+    }
+  return NULL_RTX;
+}
+
+/* Worker function for TARGET_PREFERRED_RELOAD_CLASS.  */
+
+static reg_class_t
+xtensa_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (CONSTANT_P (x) && CONST_DOUBLE_P (x))
+    return NO_REGS;
+
+  /* Don't use the stack pointer or hard frame pointer for reloads!
+     The hard frame pointer would normally be OK except that it may
+     briefly hold an incoming argument in the prologue, and reload
+     won't know that it is live because the hard frame pointer is
+     treated specially.  */
+
+  if (rclass == AR_REGS || rclass == GR_REGS)
+    return RL_REGS;
+
+  return rclass;
+}
+
+/* Worker function for TARGET_PREFERRED_OUTPUT_RELOAD_CLASS.  */
+
+static reg_class_t
+xtensa_preferred_output_reload_class (rtx x ATTRIBUTE_UNUSED,
+				      reg_class_t rclass)
+{
+  /* Don't use the stack pointer or hard frame pointer for reloads!
+     The hard frame pointer would normally be OK except that it may
+     briefly hold an incoming argument in the prologue, and reload
+     won't know that it is live because the hard frame pointer is
+     treated specially.  */
+
+  if (rclass == AR_REGS || rclass == GR_REGS)
+    return RL_REGS;
+
+  return rclass;
+}
+
+/* Worker function for TARGET_SECONDARY_RELOAD.  */
+
+static reg_class_t
+xtensa_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
+			 enum machine_mode mode, secondary_reload_info *sri)
+{
+  int regno;
+
+  if (in_p && constantpool_mem_p (x))
+    {
+      if (rclass == FP_REGS)
+	return RL_REGS;
+
+      if (mode == QImode)
+	sri->icode = CODE_FOR_reloadqi_literal;
+      else if (mode == HImode)
+	sri->icode = CODE_FOR_reloadhi_literal;
+    }
+
+  regno = xt_true_regnum (x);
+  if (ACC_REG_P (regno))
+    return ((rclass == GR_REGS || rclass == RL_REGS) ? NO_REGS : RL_REGS);
+  if (rclass == ACC_REG)
+    return (GP_REG_P (regno) ? NO_REGS : RL_REGS);
+
+  return NO_REGS;
+}
+
+
+void
+order_regs_for_local_alloc (void)
+{
+  if (!leaf_function_p ())
+    {
+      memcpy (reg_alloc_order, reg_nonleaf_alloc_order,
+	      FIRST_PSEUDO_REGISTER * sizeof (int));
+    }
+  else
+    {
+      int i, num_arg_regs;
+      int nxt = 0;
+
+      /* Use the AR registers in increasing order (skipping a0 and a1)
+	 but save the incoming argument registers for a last resort.  */
+      num_arg_regs = crtl->args.info.arg_words;
+      if (num_arg_regs > MAX_ARGS_IN_REGISTERS)
+	num_arg_regs = MAX_ARGS_IN_REGISTERS;
+      for (i = GP_ARG_FIRST; i < 16 - num_arg_regs; i++)
+	reg_alloc_order[nxt++] = i + num_arg_regs;
+      for (i = 0; i < num_arg_regs; i++)
+	reg_alloc_order[nxt++] = GP_ARG_FIRST + i;
+
+      /* List the coprocessor registers in order.  */
+      for (i = 0; i < BR_REG_NUM; i++)
+	reg_alloc_order[nxt++] = BR_REG_FIRST + i;
+
+      /* List the FP registers in order for now.  */
+      for (i = 0; i < 16; i++)
+	reg_alloc_order[nxt++] = FP_REG_FIRST + i;
+
+      /* GCC requires that we list *all* the registers....  */
+      reg_alloc_order[nxt++] = 0;	/* a0 = return address */
+      reg_alloc_order[nxt++] = 1;	/* a1 = stack pointer */
+      reg_alloc_order[nxt++] = 16;	/* pseudo frame pointer */
+      reg_alloc_order[nxt++] = 17;	/* pseudo arg pointer */
+
+      reg_alloc_order[nxt++] = ACC_REG_FIRST;	/* MAC16 accumulator */
+    }
+}
+
+
+/* Some Xtensa targets support multiple bss sections.  If the section
+   name ends with ".bss", add SECTION_BSS to the flags.  */
+
+static unsigned int
+xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+  const char *suffix;
+
+  suffix = strrchr (name, '.');
+  if (suffix && strcmp (suffix, ".bss") == 0)
+    {
+      if (!decl || (TREE_CODE (decl) == VAR_DECL
+		    && DECL_INITIAL (decl) == NULL_TREE))
+	flags |= SECTION_BSS;  /* @nobits */
+      else
+	warning (0, "only uninitialized variables can be placed in a "
+		 ".bss section");
+    }
+
+  return flags;
+}
+
+
+/* The literal pool stays with the function.  */
+
+static section *
+xtensa_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   rtx x ATTRIBUTE_UNUSED,
+			   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  return function_section (current_function_decl);
+}
+
+/* Worker function for TARGET_REGISTER_MOVE_COST.  */
+
+static int
+xtensa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   reg_class_t from, reg_class_t to)
+{
+  if (from == to && from != BR_REGS && to != BR_REGS)
+    return 2;
+  else if (reg_class_subset_p (from, AR_REGS)
+	   && reg_class_subset_p (to, AR_REGS))
+    return 2;
+  else if (reg_class_subset_p (from, AR_REGS) && to == ACC_REG)
+    return 3;
+  else if (from == ACC_REG && reg_class_subset_p (to, AR_REGS))
+    return 3;
+  else
+    return 10;
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST.  */
+
+static int
+xtensa_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			 reg_class_t rclass ATTRIBUTE_UNUSED,
+			 bool in ATTRIBUTE_UNUSED)
+{
+  return 4;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+xtensa_rtx_costs (rtx x, int code, int outer_code, int *total,
+		  bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      switch (outer_code)
+	{
+	case SET:
+	  if (xtensa_simm12b (INTVAL (x)))
+	    {
+	      *total = 4;
+	      return true;
+	    }
+	  break;
+	case PLUS:
+	  if (xtensa_simm8 (INTVAL (x))
+	      || xtensa_simm8x256 (INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	  break;
+	case AND:
+	  if (xtensa_mask_immediate (INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	  break;
+	case COMPARE:
+	  if ((INTVAL (x) == 0) || xtensa_b4const (INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	  break;
+	case ASHIFT:
+	case ASHIFTRT:
+	case LSHIFTRT:
+	case ROTATE:
+	case ROTATERT:
+	  /* No way to tell if X is the 2nd operand so be conservative.  */
+	default: break;
+	}
+      if (xtensa_simm12b (INTVAL (x)))
+	*total = 5;
+      else if (TARGET_CONST16)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = 6;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (TARGET_CONST16)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = 5;
+      return true;
+
+    case CONST_DOUBLE:
+      if (TARGET_CONST16)
+	*total = COSTS_N_INSNS (4);
+      else
+	*total = 7;
+      return true;
+
+    case MEM:
+      {
+	int num_words =
+	  (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) ?  2 : 1;
+
+	if (memory_address_p (GET_MODE (x), XEXP ((x), 0)))
+	  *total = COSTS_N_INSNS (num_words);
+	else
+	  *total = COSTS_N_INSNS (2*num_words);
+	return true;
+      }
+
+    case FFS:
+    case CTZ:
+      *total = COSTS_N_INSNS (TARGET_NSA ? 5 : 50);
+      return true;
+
+    case CLZ:
+      *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50);
+      return true;
+
+    case NOT:
+      *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 3 : 2);
+      return true;
+
+    case AND:
+    case IOR:
+    case XOR:
+      if (GET_MODE (x) == DImode)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (GET_MODE (x) == DImode)
+	*total = COSTS_N_INSNS (50);
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case ABS:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50);
+	else if (xmode == DFmode)
+	  *total = COSTS_N_INSNS (50);
+	else
+	  *total = COSTS_N_INSNS (4);
+	return true;
+      }
+
+    case PLUS:
+    case MINUS:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50);
+	else if (xmode == DFmode || xmode == DImode)
+	  *total = COSTS_N_INSNS (50);
+	else
+	  *total = COSTS_N_INSNS (1);
+	return true;
+      }
+
+    case NEG:
+      *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 4 : 2);
+      return true;
+
+    case MULT:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 4 : 50);
+	else if (xmode == DFmode)
+	  *total = COSTS_N_INSNS (50);
+	else if (xmode == DImode)
+	  *total = COSTS_N_INSNS (TARGET_MUL32_HIGH ? 10 : 50);
+	else if (TARGET_MUL32)
+	  *total = COSTS_N_INSNS (4);
+	else if (TARGET_MAC16)
+	  *total = COSTS_N_INSNS (16);
+	else if (TARGET_MUL16)
+	  *total = COSTS_N_INSNS (12);
+	else
+	  *total = COSTS_N_INSNS (50);
+	return true;
+      }
+
+    case DIV:
+    case MOD:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  {
+	    *total = COSTS_N_INSNS (TARGET_HARD_FLOAT_DIV ? 8 : 50);
+	    return true;
+	  }
+	else if (xmode == DFmode)
+	  {
+	    *total = COSTS_N_INSNS (50);
+	    return true;
+	  }
+      }
+      /* Fall through.  */
+
+    case UDIV:
+    case UMOD:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == DImode)
+	  *total = COSTS_N_INSNS (50);
+	else if (TARGET_DIV32)
+	  *total = COSTS_N_INSNS (32);
+	else
+	  *total = COSTS_N_INSNS (50);
+	return true;
+      }
+
+    case SQRT:
+      if (GET_MODE (x) == SFmode)
+	*total = COSTS_N_INSNS (TARGET_HARD_FLOAT_SQRT ? 8 : 50);
+      else
+	*total = COSTS_N_INSNS (50);
+      return true;
+
+    case SMIN:
+    case UMIN:
+    case SMAX:
+    case UMAX:
+      *total = COSTS_N_INSNS (TARGET_MINMAX ? 1 : 50);
+      return true;
+
+    case SIGN_EXTRACT:
+    case SIGN_EXTEND:
+      *total = COSTS_N_INSNS (TARGET_SEXT ? 1 : 2);
+      return true;
+
+    case ZERO_EXTRACT:
+    case ZERO_EXTEND:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)
+	  > 4 * UNITS_PER_WORD);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.  */
+
+rtx
+xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, 
+                      bool outgoing)
+{
+  return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype)
+                      && TYPE_PRECISION (valtype) < BITS_PER_WORD)
+                     ? SImode : TYPE_MODE (valtype),
+                     outgoing ? GP_OUTGOING_RETURN : GP_RETURN);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+xtensa_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG ((GET_MODE_CLASS (mode) == MODE_INT
+		       && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+		      ? SImode : mode, GP_RETURN);
+}
+
+/* Worker function TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+xtensa_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == GP_RETURN);
+}
+
+/* The static chain is passed in memory.  Provide rtx giving 'mem'
+   expressions that denote where they are stored.  */
+
+static rtx
+xtensa_static_chain (const_tree ARG_UNUSED (fndecl), bool incoming_p)
+{
+  rtx base = incoming_p ? arg_pointer_rtx : stack_pointer_rtx;
+  return gen_frame_mem (Pmode, plus_constant (base, -5 * UNITS_PER_WORD));
+}
+
+
+/* TRAMPOLINE_TEMPLATE: For Xtensa, the trampoline must perform an ENTRY
+   instruction with a minimal stack frame in order to get some free
+   registers.  Once the actual call target is known, the proper stack frame
+   size is extracted from the ENTRY instruction at the target and the
+   current frame is adjusted to match.  The trampoline then transfers
+   control to the instruction following the ENTRY at the target.  Note:
+   this assumes that the target begins with an ENTRY instruction.  */
+
+static void
+xtensa_asm_trampoline_template (FILE *stream)
+{
+  bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
+
+  fprintf (stream, "\t.begin no-transform\n");
+  fprintf (stream, "\tentry\tsp, %d\n", MIN_FRAME_SIZE);
+
+  if (use_call0)
+    {
+      /* Save the return address.  */
+      fprintf (stream, "\tmov\ta10, a0\n");
+
+      /* Use a CALL0 instruction to skip past the constants and in the
+	 process get the PC into A0.  This allows PC-relative access to
+	 the constants without relying on L32R.  */
+      fprintf (stream, "\tcall0\t.Lskipconsts\n");
+    }
+  else
+    fprintf (stream, "\tj\t.Lskipconsts\n");
+
+  fprintf (stream, "\t.align\t4\n");
+  fprintf (stream, ".Lchainval:%s0\n", integer_asm_op (4, TRUE));
+  fprintf (stream, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE));
+  fprintf (stream, ".Lskipconsts:\n");
+
+  /* Load the static chain and function address from the trampoline.  */
+  if (use_call0)
+    {
+      fprintf (stream, "\taddi\ta0, a0, 3\n");
+      fprintf (stream, "\tl32i\ta9, a0, 0\n");
+      fprintf (stream, "\tl32i\ta8, a0, 4\n");
+    }
+  else
+    {
+      fprintf (stream, "\tl32r\ta9, .Lchainval\n");
+      fprintf (stream, "\tl32r\ta8, .Lfnaddr\n");
+    }
+
+  /* Store the static chain.  */
+  fprintf (stream, "\ts32i\ta9, sp, %d\n", MIN_FRAME_SIZE - 20);
+
+  /* Set the proper stack pointer value.  */
+  fprintf (stream, "\tl32i\ta9, a8, 0\n");
+  fprintf (stream, "\textui\ta9, a9, %d, 12\n",
+	   TARGET_BIG_ENDIAN ? 8 : 12);
+  fprintf (stream, "\tslli\ta9, a9, 3\n");
+  fprintf (stream, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE);
+  fprintf (stream, "\tsub\ta9, sp, a9\n");
+  fprintf (stream, "\tmovsp\tsp, a9\n");
+
+  if (use_call0)
+    /* Restore the return address.  */
+    fprintf (stream, "\tmov\ta0, a10\n");
+
+  /* Jump to the instruction following the ENTRY.  */
+  fprintf (stream, "\taddi\ta8, a8, 3\n");
+  fprintf (stream, "\tjx\ta8\n");
+
+  /* Pad size to a multiple of TRAMPOLINE_ALIGNMENT.  */
+  if (use_call0)
+    fprintf (stream, "\t.byte\t0\n");
+  else
+    fprintf (stream, "\tnop\n");
+
+  fprintf (stream, "\t.end no-transform\n");
+}
+
+static void
+xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain)
+{
+  rtx func = XEXP (DECL_RTL (fndecl), 0);
+  bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
+  int chain_off = use_call0 ? 12 : 8;
+  int func_off = use_call0 ? 16 : 12;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  emit_move_insn (adjust_address (m_tramp, SImode, chain_off), chain);
+  emit_move_insn (adjust_address (m_tramp, SImode, func_off), func);
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"),
+		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+}
+
+
+#include "gt-xtensa.h"
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
new file mode 100644
index 000000000..0a096cdb5
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.h
@@ -0,0 +1,847 @@
+/* Definitions of Tensilica's Xtensa target machine for GNU compiler.
+   Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Get Xtensa configuration settings */
+#include "xtensa-config.h"
+
+/* External variables defined in xtensa.c.  */
+
+extern unsigned xtensa_current_frame_size;
+
+/* Macros used in the machine description to select various Xtensa
+   configuration options.  */
+#ifndef XCHAL_HAVE_MUL32_HIGH
+#define XCHAL_HAVE_MUL32_HIGH 0
+#endif
+#ifndef XCHAL_HAVE_RELEASE_SYNC
+#define XCHAL_HAVE_RELEASE_SYNC 0
+#endif
+#ifndef XCHAL_HAVE_S32C1I
+#define XCHAL_HAVE_S32C1I 0
+#endif
+#ifndef XCHAL_HAVE_THREADPTR
+#define XCHAL_HAVE_THREADPTR 0
+#endif
+#define TARGET_BIG_ENDIAN	XCHAL_HAVE_BE
+#define TARGET_DENSITY		XCHAL_HAVE_DENSITY
+#define TARGET_MAC16		XCHAL_HAVE_MAC16
+#define TARGET_MUL16		XCHAL_HAVE_MUL16
+#define TARGET_MUL32		XCHAL_HAVE_MUL32
+#define TARGET_MUL32_HIGH	XCHAL_HAVE_MUL32_HIGH
+#define TARGET_DIV32		XCHAL_HAVE_DIV32
+#define TARGET_NSA		XCHAL_HAVE_NSA
+#define TARGET_MINMAX		XCHAL_HAVE_MINMAX
+#define TARGET_SEXT		XCHAL_HAVE_SEXT
+#define TARGET_BOOLEANS		XCHAL_HAVE_BOOLEANS
+#define TARGET_HARD_FLOAT	XCHAL_HAVE_FP
+#define TARGET_HARD_FLOAT_DIV	XCHAL_HAVE_FP_DIV
+#define TARGET_HARD_FLOAT_RECIP	XCHAL_HAVE_FP_RECIP
+#define TARGET_HARD_FLOAT_SQRT	XCHAL_HAVE_FP_SQRT
+#define TARGET_HARD_FLOAT_RSQRT	XCHAL_HAVE_FP_RSQRT
+#define TARGET_ABS		XCHAL_HAVE_ABS
+#define TARGET_ADDX		XCHAL_HAVE_ADDX
+#define TARGET_RELEASE_SYNC	XCHAL_HAVE_RELEASE_SYNC
+#define TARGET_S32C1I		XCHAL_HAVE_S32C1I
+#define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS
+#define TARGET_THREADPTR	XCHAL_HAVE_THREADPTR
+
+#define TARGET_DEFAULT \
+  ((XCHAL_HAVE_L32R	? 0 : MASK_CONST16) |				\
+   MASK_SERIALIZE_VOLATILE)
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do {									\
+    builtin_assert ("cpu=xtensa");					\
+    builtin_assert ("machine=xtensa");					\
+    builtin_define ("__xtensa__");					\
+    builtin_define ("__XTENSA__");					\
+    builtin_define ("__XTENSA_WINDOWED_ABI__");				\
+    builtin_define (TARGET_BIG_ENDIAN ? "__XTENSA_EB__" : "__XTENSA_EL__"); \
+    if (!TARGET_HARD_FLOAT)						\
+      builtin_define ("__XTENSA_SOFT_FLOAT__");				\
+  } while (0)
+
+#define CPP_SPEC " %(subtarget_cpp_spec) "
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#define EXTRA_SPECS							\
+  { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this if most significant word of a multiword number is the lowest.  */
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+#define MAX_BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+#define MIN_UNITS_PER_WORD 4
+
+/* Width of a floating point register.  */
+#define UNITS_PER_FPREG 4
+
+/* Size in bits of various types on the target machine.  */
+#define INT_TYPE_SIZE 32
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* Allocation boundary (in *bits*) for storing pointers in memory.  */
+#define POINTER_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after 'int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* There is no point aligning anything to a rounder boundary than this.  */
+#define BIGGEST_ALIGNMENT 128
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Promote integer modes smaller than a word to SImode.  Set UNSIGNEDP
+   for QImode, because there is no 8-bit load from memory with sign
+   extension.  Otherwise, leave UNSIGNEDP alone, since Xtensa has 16-bit
+   loads both with and without sign extension.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)				\
+  do {									\
+    if (GET_MODE_CLASS (MODE) == MODE_INT				\
+	&& GET_MODE_SIZE (MODE) < UNITS_PER_WORD)			\
+      {									\
+	if ((MODE) == QImode)						\
+	  (UNSIGNEDP) = 1;						\
+	(MODE) = SImode;						\
+      }									\
+  } while (0)
+
+/* Imitate the way many other C compilers handle alignment of
+   bitfields and the structures that contain them.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Disable the use of word-sized or smaller complex modes for structures,
+   and for function arguments in particular, where they cause problems with
+   register a7.  The xtensa_copy_incoming_a7 function assumes that there is
+   a single reference to an argument in a7, but with small complex modes the
+   real and imaginary components may be extracted separately, leading to two
+   uses of the register, only one of which would be replaced.  */
+#define MEMBER_TYPE_FORCES_BLK(FIELD, MODE) \
+  ((MODE) == CQImode || (MODE) == CHImode)
+
+/* Align string constants and constructors to at least a word boundary.
+   The typical use of this macro is to increase alignment for string
+   constants to be word aligned so that 'strcpy' calls that copy
+   constants can be done inline.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)					\
+  ((TREE_CODE (EXP) == STRING_CST || TREE_CODE (EXP) == CONSTRUCTOR)	\
+   && (ALIGN) < BITS_PER_WORD						\
+	? BITS_PER_WORD							\
+	: (ALIGN))
+
+/* Align arrays, unions and records to at least a word boundary.
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  Another is to
+   cause character arrays to be word-aligned so that 'strcpy' calls
+   that copy constants to character arrays can be done inline.  */
+#undef DATA_ALIGNMENT
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* Operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Xtensa loads are zero-extended by default.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   The fake frame pointer and argument pointer will never appear in
+   the generated code, since they will always be eliminated and replaced
+   by either the stack pointer or the hard frame pointer.
+
+   0 - 15	AR[0] - AR[15]
+   16		FRAME_POINTER (fake = initial sp)
+   17		ARG_POINTER (fake = initial sp + framesize)
+   18		BR[0] for floating-point CC
+   19 - 34	FR[0] - FR[15]
+   35		MAC16 accumulator */
+
+#define FIRST_PSEUDO_REGISTER 36
+
+/* Return the stabs register number to use for REGNO.  */
+#define DBX_REGISTER_NUMBER(REGNO) xtensa_dbx_register_number (REGNO)
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+#define FIXED_REGISTERS							\
+{									\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  1, 1, 0,								\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0,									\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS						\
+{									\
+  1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1,								\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1,									\
+}
+
+/* For non-leaf procedures on Xtensa processors, the allocation order
+   is as specified below by REG_ALLOC_ORDER.  For leaf procedures, we
+   want to use the lowest numbered registers first to minimize
+   register window overflows.  However, local-alloc is not smart
+   enough to consider conflicts with incoming arguments.  If an
+   incoming argument in a2 is live throughout the function and
+   local-alloc decides to use a2, then the incoming argument must
+   either be spilled or copied to another register.  To get around
+   this, we define ADJUST_REG_ALLOC_ORDER to redefine
+   reg_alloc_order for leaf functions such that lowest numbered
+   registers are used first with the exception that the incoming
+   argument registers are not used until after other register choices
+   have been exhausted.  */
+
+#define REG_ALLOC_ORDER \
+{  8,  9, 10, 11, 12, 13, 14, 15,  7,  6,  5,  4,  3,  2, \
+  18, \
+  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \
+   0,  1, 16, 17, \
+  35, \
+}
+
+#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
+
+/* For Xtensa, the only point of this is to prevent GCC from otherwise
+   giving preference to call-used registers.  To minimize window
+   overflows for the AR registers, we want to give preference to the
+   lower-numbered AR registers.  For other register files, which are
+   not windowed, we still prefer call-used registers, if there are any.  */
+extern const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER];
+#define LEAF_REGISTERS xtensa_leaf_regs
+
+/* For Xtensa, no remapping is necessary, but this macro must be
+   defined if LEAF_REGISTERS is defined.  */
+#define LEAF_REG_REMAP(REGNO) (REGNO)
+
+/* This must be declared if LEAF_REGISTERS is set.  */
+extern int leaf_function;
+
+/* Internal macros to classify a register number.  */
+
+/* 16 address registers + fake registers */
+#define GP_REG_FIRST 0
+#define GP_REG_LAST  17
+#define GP_REG_NUM   (GP_REG_LAST - GP_REG_FIRST + 1)
+
+/* Coprocessor registers */
+#define BR_REG_FIRST 18
+#define BR_REG_LAST  18 
+#define BR_REG_NUM   (BR_REG_LAST - BR_REG_FIRST + 1)
+
+/* 16 floating-point registers */
+#define FP_REG_FIRST 19
+#define FP_REG_LAST  34
+#define FP_REG_NUM   (FP_REG_LAST - FP_REG_FIRST + 1)
+
+/* MAC16 accumulator */
+#define ACC_REG_FIRST 35
+#define ACC_REG_LAST 35
+#define ACC_REG_NUM  (ACC_REG_LAST - ACC_REG_FIRST + 1)
+
+#define GP_REG_P(REGNO) ((unsigned) ((REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+#define BR_REG_P(REGNO) ((unsigned) ((REGNO) - BR_REG_FIRST) < BR_REG_NUM)
+#define FP_REG_P(REGNO) ((unsigned) ((REGNO) - FP_REG_FIRST) < FP_REG_NUM)
+#define ACC_REG_P(REGNO) ((unsigned) ((REGNO) - ACC_REG_FIRST) < ACC_REG_NUM)
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  (FP_REG_P (REGNO) ?							\
+	((GET_MODE_SIZE (MODE) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG) : \
+	((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  */
+extern char xtensa_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER];
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)					\
+  xtensa_hard_regno_mode_ok[(int) (MODE)][(REGNO)]
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)					\
+  ((GET_MODE_CLASS (MODE1) == MODE_FLOAT ||				\
+    GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)			\
+   == (GET_MODE_CLASS (MODE2) == MODE_FLOAT ||				\
+       GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM (GP_REG_FIRST + 1)
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 7)
+
+/* The register number of the frame pointer register, which is used to
+   access automatic variables in the stack frame.  For Xtensa, this
+   register never appears in the output.  It is always eliminated to
+   either the stack pointer or the hard frame pointer.  */
+#define FRAME_POINTER_REGNUM (GP_REG_FIRST + 16)
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM (GP_REG_FIRST + 17)
+
+/* For now we don't try to use the full set of boolean registers.  Without
+   software pipelining of FP operations, there's not much to gain and it's
+   a real pain to get them reloaded.  */
+#define FPCC_REGNUM (BR_REG_FIRST + 0)
+
+/* It is as good or better to call a constant function address than to
+   call an address kept in a register.  */
+#define NO_FUNCTION_CSE 1
+
+/* Xtensa processors have "register windows".  GCC does not currently
+   take advantage of the possibility for variable-sized windows; instead,
+   we use a fixed window size of 8.  */
+
+#define INCOMING_REGNO(OUT)						\
+  ((GP_REG_P (OUT) &&							\
+    ((unsigned) ((OUT) - GP_REG_FIRST) >= WINDOW_SIZE)) ?		\
+   (OUT) - WINDOW_SIZE : (OUT))
+
+#define OUTGOING_REGNO(IN)						\
+  ((GP_REG_P (IN) &&							\
+    ((unsigned) ((IN) - GP_REG_FIRST) < WINDOW_SIZE)) ?			\
+   (IN) + WINDOW_SIZE : (IN))
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  */
+enum reg_class
+{
+  NO_REGS,			/* no registers in set */
+  BR_REGS,			/* coprocessor boolean registers */
+  FP_REGS,			/* floating point registers */
+  ACC_REG,			/* MAC16 accumulator */
+  SP_REG,			/* sp register (aka a1) */
+  RL_REGS,			/* preferred reload regs (not sp or fp) */
+  GR_REGS,			/* integer registers except sp */
+  AR_REGS,			/* all integer registers */
+  ALL_REGS,			/* all registers */
+  LIM_REG_CLASSES		/* max value + 1 */
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define GENERAL_REGS AR_REGS
+
+/* An initializer containing the names of the register classes as C
+   string constants.  These names are used in writing some of the
+   debugging dumps.  */
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "BR_REGS",								\
+  "FP_REGS",								\
+  "ACC_REG",								\
+  "SP_REG",								\
+  "RL_REGS",								\
+  "GR_REGS",								\
+  "AR_REGS",								\
+  "ALL_REGS"								\
+}
+
+/* Contents of the register classes.  The Nth integer specifies the
+   contents of class N.  The way the integer MASK is interpreted is
+   that register R is in the class if 'MASK & (1 << R)' is 1.  */
+#define REG_CLASS_CONTENTS \
+{ \
+  { 0x00000000, 0x00000000 }, /* no registers */ \
+  { 0x00040000, 0x00000000 }, /* coprocessor boolean registers */ \
+  { 0xfff80000, 0x00000007 }, /* floating-point registers */ \
+  { 0x00000000, 0x00000008 }, /* MAC16 accumulator */ \
+  { 0x00000002, 0x00000000 }, /* stack pointer register */ \
+  { 0x0000ff7d, 0x00000000 }, /* preferred reload registers */ \
+  { 0x0000fffd, 0x00000000 }, /* general-purpose registers */ \
+  { 0x0003ffff, 0x00000000 }, /* integer registers */ \
+  { 0xffffffff, 0x0000000f }  /* all registers */ \
+}
+
+#define IRA_COVER_CLASSES						\
+{									\
+  BR_REGS, FP_REGS, ACC_REG, AR_REGS, LIM_REG_CLASSES			\
+}
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  In general there is more that one such class;
+   choose a class which is "minimal", meaning that no smaller class
+   also contains the register.  */
+extern const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER];
+
+#define REGNO_REG_CLASS(REGNO) xtensa_regno_to_class[ (REGNO) ]
+
+/* Use the Xtensa AR register file for base registers.
+   No index registers.  */
+#define BASE_REG_CLASS AR_REGS
+#define INDEX_REG_CLASS NO_REGS
+
+/* The small_register_classes_for_mode_p hook must always return true for
+   Xtrnase, because all of the 16 AR registers may be explicitly used in
+   the RTL, as either incoming or outgoing arguments.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_UNITS(mode, size)						\
+  ((GET_MODE_SIZE (mode) + (size) - 1) / (size))
+
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  (CLASS_UNITS (MODE, UNITS_PER_WORD))
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Offset within stack frame to start allocating local variables at.  */
+#define STARTING_FRAME_OFFSET						\
+  crtl->outgoing_args_size
+
+/* The ARG_POINTER and FRAME_POINTER are not real Xtensa registers, so
+   they are eliminated to either the stack pointer or hard frame pointer.  */
+#define ELIMINABLE_REGS							\
+{{ ARG_POINTER_REGNUM,		STACK_POINTER_REGNUM},			\
+ { ARG_POINTER_REGNUM,		HARD_FRAME_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM,	STACK_POINTER_REGNUM},			\
+ { FRAME_POINTER_REGNUM,	HARD_FRAME_POINTER_REGNUM}}
+
+/* Specify the initial difference between the specified pair of registers.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  do {									\
+    compute_frame_size (get_frame_size ());				\
+    switch (FROM)							\
+      {									\
+      case FRAME_POINTER_REGNUM:					\
+        (OFFSET) = 0;							\
+	break;								\
+      case ARG_POINTER_REGNUM:						\
+        (OFFSET) = xtensa_current_frame_size;				\
+	break;								\
+      default:								\
+	gcc_unreachable ();						\
+      }									\
+  } while (0)
+
+/* If defined, the maximum amount of space required for outgoing
+   arguments will be computed and placed into the variable
+   'crtl->outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue
+   should increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset from the argument pointer register to the first argument's
+   address.  On some machines it may depend on the data type of the
+   function.  If 'ARGS_GROW_DOWNWARD', this is the offset to the
+   location above the first argument's address.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Align stack frames on 128 bits for Xtensa.  This is necessary for
+   128-bit datatypes defined in TIE (e.g., for Vectra).  */
+#define STACK_BOUNDARY 128
+
+/* Use a fixed register window size of 8.  */
+#define WINDOW_SIZE 8
+
+/* Symbolic macros for the registers used to return integer, floating
+   point, and values of coprocessor and user-defined modes.  */
+#define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE)
+#define GP_OUTGOING_RETURN (GP_REG_FIRST + 2)
+
+/* Symbolic macros for the first/last argument registers.  */
+#define GP_ARG_FIRST (GP_REG_FIRST + 2)
+#define GP_ARG_LAST  (GP_REG_FIRST + 7)
+#define GP_OUTGOING_ARG_FIRST (GP_REG_FIRST + 2 + WINDOW_SIZE)
+#define GP_OUTGOING_ARG_LAST  (GP_REG_FIRST + 7 + WINDOW_SIZE)
+
+#define MAX_ARGS_IN_REGISTERS 6
+
+/* Don't worry about compatibility with PCC.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* A C expression that is nonzero if REGNO is the number of a hard
+   register in which function arguments are sometimes passed.  This
+   does *not* include implicit arguments such as the static chain and
+   the structure-value address.  On many machines, no registers can be
+   used for this purpose since all function arguments are pushed on
+   the stack.  */
+#define FUNCTION_ARG_REGNO_P(N)						\
+  ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST)
+
+/* Record the number of argument words seen so far, along with a flag to
+   indicate whether these are incoming arguments.  (FUNCTION_INCOMING_ARG
+   is used for both incoming and outgoing args, so a separate flag is
+   needed.  */
+typedef struct xtensa_args
+{
+  int arg_words;
+  int incoming;
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  init_cumulative_args (&CUM, 0)
+
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME)		\
+  init_cumulative_args (&CUM, 1)
+
+/* Profiling Xtensa code is typically done with the built-in profiling
+   feature of Tensilica's instruction set simulator, which does not
+   require any compiler support.  Profiling code on a real (i.e.,
+   non-simulated) Xtensa processor is currently only supported by
+   GNU/Linux with glibc.  The glibc version of _mcount doesn't require
+   counter variables.  The _mcount function needs the current PC and
+   the current return address to identify an arc in the call graph.
+   Pass the current return address as the first argument; the current
+   PC is available as a0 in _mcount's register window.  Both of these
+   values contain window size information in the two most significant
+   bits; we assume that _mcount will mask off those bits.  The call to
+   _mcount uses a window size of 8 to make sure that it doesn't clobber
+   any incoming argument values.  */
+
+#define NO_PROFILE_COUNTERS	1
+
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+  do {									\
+    fprintf (FILE, "\t%s\ta10, a0\n", TARGET_DENSITY ? "mov.n" : "mov"); \
+    if (flag_pic)							\
+      {									\
+	fprintf (FILE, "\tmovi\ta8, _mcount@PLT\n");			\
+	fprintf (FILE, "\tcallx8\ta8\n");				\
+      }									\
+    else								\
+      fprintf (FILE, "\tcall8\t_mcount\n");				\
+  } while (0)
+
+/* Stack pointer value doesn't matter at exit.  */
+#define EXIT_IGNORE_STACK 1
+
+/* Size in bytes of the trampoline, as an integer.  Make sure this is
+   a multiple of TRAMPOLINE_ALIGNMENT to avoid -Wpadded warnings.  */
+#define TRAMPOLINE_SIZE (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS ? 60 : 52)
+
+/* Alignment required for trampolines, in bits.  */
+#define TRAMPOLINE_ALIGNMENT 32
+
+/* If defined, a C expression that produces the machine-specific code
+   to setup the stack so that arbitrary frames can be accessed.
+
+   On Xtensa, a stack back-trace must always begin from the stack pointer,
+   so that the register overflow save area can be located.  However, the
+   stack-walking code in GCC always begins from the hard_frame_pointer
+   register, not the stack pointer.  The frame pointer is usually equal
+   to the stack pointer, but the __builtin_return_address and
+   __builtin_frame_address functions will not work if count > 0 and
+   they are called from a routine that uses alloca.  These functions
+   are not guaranteed to work at all if count > 0 so maybe that is OK.
+
+   A nicer solution would be to allow the architecture-specific files to
+   specify whether to start from the stack pointer or frame pointer.  That
+   would also allow us to skip the machine->accesses_prev_frame stuff that
+   we currently need to ensure that there is a frame pointer when these
+   builtin functions are used.  */
+
+#define SETUP_FRAME_ADDRESSES  xtensa_setup_frame_addresses
+
+/* A C expression whose value is RTL representing the address in a
+   stack frame where the pointer to the caller's frame is stored.
+   Assume that FRAMEADDR is an RTL expression for the address of the
+   stack frame itself.
+
+   For Xtensa, there is no easy way to get the frame pointer if it is
+   not equivalent to the stack pointer.  Moreover, the result of this
+   macro is used for continuing to walk back up the stack, so it must
+   return the stack pointer address.  Thus, there is some inconsistency
+   here in that __builtin_frame_address will return the frame pointer
+   when count == 0 and the stack pointer when count > 0.  */
+
+#define DYNAMIC_CHAIN_ADDRESS(frame)					\
+  gen_rtx_PLUS (Pmode, frame, GEN_INT (-3 * UNITS_PER_WORD))
+
+/* Define this if the return address of a particular stack frame is
+   accessed from the frame pointer of the previous stack frame.  */
+#define RETURN_ADDR_IN_PREVIOUS_FRAME
+
+/* A C expression whose value is RTL representing the value of the
+   return address for the frame COUNT steps up from the current
+   frame, after the prologue.  */
+#define RETURN_ADDR_RTX  xtensa_return_addr
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* C expressions which are nonzero if register number NUM is suitable
+   for use as a base or index register in operand addresses.  */
+
+#define REGNO_OK_FOR_INDEX_P(NUM) 0
+#define REGNO_OK_FOR_BASE_P(NUM) \
+  (GP_REG_P (NUM) || GP_REG_P ((unsigned) reg_renumber[NUM]))
+
+/* C expressions that are nonzero if X (assumed to be a `reg' RTX) is
+   valid for use as a base or index register.  */
+
+#ifdef REG_OK_STRICT
+#define REG_OK_STRICT_FLAG 1
+#else
+#define REG_OK_STRICT_FLAG 0
+#endif
+
+#define BASE_REG_P(X, STRICT)						\
+  ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER)			\
+   || REGNO_OK_FOR_BASE_P (REGNO (X)))
+
+#define REG_OK_FOR_INDEX_P(X) 0
+#define REG_OK_FOR_BASE_P(X) BASE_REG_P (X, REG_OK_STRICT_FLAG)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* A C expression that is 1 if the RTX X is a constant which is a
+   valid address.  This is defined to be the same as 'CONSTANT_P (X)',
+   but rejecting CONST_DOUBLE.  */
+#define CONSTANT_ADDRESS_P(X)						\
+  ((GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH		\
+    || (GET_CODE (X) == CONST)))
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+#define LEGITIMATE_CONSTANT_P(X) (! xtensa_tls_referenced_p (X))
+
+/* A C expression that is nonzero if X is a legitimate immediate
+   operand on the target machine when generating position independent
+   code.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)					\
+  ((GET_CODE (X) != SYMBOL_REF						\
+    || (SYMBOL_REF_LOCAL_P (X) && !SYMBOL_REF_EXTERNAL_P (X)))		\
+   && GET_CODE (X) != LABEL_REF						\
+   && GET_CODE (X) != CONST)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE (SImode)
+
+/* Define this as 1 if 'char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+#define MAX_MOVE_MAX 4
+
+/* Prefer word-sized loads.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Shift instructions ignore all but the low-order few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = -1, 1)
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode SImode
+
+/* A function address in a call instruction is a word address (for
+   indexing purposes) so give the MEM rtx a words's mode.  */
+#define FUNCTION_MODE SImode
+
+#define BRANCH_COST(speed_p, predictable_p) 3
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+#define REGISTER_NAMES							\
+{									\
+  "a0",   "sp",   "a2",   "a3",   "a4",   "a5",   "a6",   "a7",		\
+  "a8",   "a9",   "a10",  "a11",  "a12",  "a13",  "a14",  "a15",	\
+  "fp",   "argp", "b0",							\
+  "f0",   "f1",   "f2",   "f3",   "f4",   "f5",   "f6",   "f7",		\
+  "f8",   "f9",   "f10",  "f11",  "f12",  "f13",  "f14",  "f15",	\
+  "acc"									\
+}
+
+/* If defined, a C initializer for an array of structures containing a
+   name and a register number.  This macro defines additional names
+   for hard registers, thus allowing the 'asm' option in declarations
+   to refer to registers using alternate names.  */
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "a1",	 1 + GP_REG_FIRST }					\
+}
+
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* Declare an uninitialized external linkage data object.  */
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
+  fprintf (STREAM, "%s%sL%u\n", integer_asm_op (4, TRUE),		\
+	   LOCAL_LABEL_PREFIX, VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+   This is used for pc-relative code.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+  do {									\
+    fprintf (STREAM, "%s%sL%u-%sL%u\n",	integer_asm_op (4, TRUE),	\
+	     LOCAL_LABEL_PREFIX, (VALUE),				\
+	     LOCAL_LABEL_PREFIX, (REL));				\
+  } while (0)
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(STREAM, LOG)					\
+  do {									\
+    if ((LOG) != 0)							\
+      fprintf (STREAM, "\t.align\t%d\n", 1 << (LOG));			\
+  } while (0)
+
+/* Indicate that jump tables go in the text section.  This is
+   necessary when compiling PIC code.  */
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+
+/* Define the strings to put out for each section in the object file.  */
+#define TEXT_SECTION_ASM_OP	"\t.text"
+#define DATA_SECTION_ASM_OP	"\t.data"
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+
+
+/* Define output to appear before the constant pool.  */
+#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE)          \
+  do {									\
+    if ((SIZE) > 0)							\
+      {									\
+	resolve_unique_section ((FUNDECL), 0, flag_function_sections);	\
+	switch_to_section (function_section (FUNDECL));			\
+	fprintf (FILE, "\t.literal_position\n");			\
+      }									\
+  } while (0)
+
+
+/* A C statement (with or without semicolon) to output a constant in
+   the constant pool, if it needs special treatment.  */
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY(FILE, X, MODE, ALIGN, LABELNO, JUMPTO) \
+  do {									\
+    xtensa_output_literal (FILE, X, MODE, LABELNO);			\
+    goto JUMPTO;							\
+  } while (0)
+
+/* How to start an assembler comment.  */
+#define ASM_COMMENT_START "#"
+
+/* Exception handling.  Xtensa uses much of the standard DWARF2 unwinding
+   machinery, but the variable size register window save areas are too
+   complicated to efficiently describe with CFI entries.  The CFA must
+   still be specified in DWARF so that DW_AT_frame_base is set correctly
+   for debugging.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 0)
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (0)
+#define DWARF_FRAME_REGISTERS 16
+#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) + 2 : INVALID_REGNUM)
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)			\
+  (flag_pic								\
+   ? (((GLOBAL) ? DW_EH_PE_indirect : 0)				\
+      | DW_EH_PE_pcrel | DW_EH_PE_sdata4)				\
+   : DW_EH_PE_absptr)
+
+/* Emit a PC-relative relocation.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)			\
+  do {									\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);				\
+    assemble_name (FILE, LABEL);					\
+    fputs ("@pcrel", FILE);						\
+  } while (0)
+
+/* Xtensa constant pool breaks the devices in crtstuff.c to control
+   section in where code resides.  We have to write it as asm code.  Use
+   a MOVI and let the assembler relax it -- for the .init and .fini
+   sections, the assembler knows to put the literal in the right
+   place.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+    asm (SECTION_OP "\n\
+	movi\ta8, " USER_LABEL_PREFIX #FUNC "\n\
+	callx8\ta8\n" \
+	TEXT_SECTION_ASM_OP);
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
new file mode 100644
index 000000000..d6eb54891
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.md
@@ -0,0 +1,1914 @@
+;; GCC machine description for Tensilica's Xtensa architecture.
+;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_constants [
+  (A0_REG		0)
+  (A1_REG		1)
+  (A7_REG		7)
+  (A8_REG		8)
+
+  (UNSPEC_NOP		2)
+  (UNSPEC_PLT		3)
+  (UNSPEC_RET_ADDR	4)
+  (UNSPEC_TPOFF		5)
+  (UNSPEC_DTPOFF	6)
+  (UNSPEC_TLS_FUNC	7)
+  (UNSPEC_TLS_ARG	8)
+  (UNSPEC_TLS_CALL	9)
+  (UNSPEC_TP		10)
+  (UNSPEC_MEMW		11)
+
+  (UNSPECV_SET_FP	1)
+  (UNSPECV_ENTRY	2)
+  (UNSPECV_S32RI	4)
+  (UNSPECV_S32C1I	5)
+  (UNSPECV_EH_RETURN	6)
+  (UNSPECV_SET_TP	7)
+])
+
+;; This code iterator allows signed and unsigned widening multiplications
+;; to use the same template.
+(define_code_iterator any_extend [sign_extend zero_extend])
+
+;; <u> expands to an empty string when doing a signed operation and
+;; "u" when doing an unsigned operation.
+(define_code_attr u [(sign_extend "") (zero_extend "u")])
+
+;; <su> is like <u>, but the signed form expands to "s" rather than "".
+(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+
+;; This code iterator allows four integer min/max operations to be
+;; generated from one template.
+(define_code_iterator any_minmax [smin umin smax umax])
+
+;; <minmax> expands to the opcode name for any_minmax operations.
+(define_code_attr minmax [(smin "min") (umin "minu")
+			  (smax "max") (umax "maxu")])
+
+;; This code iterator is for floating-point comparisons.
+(define_code_iterator any_scc_sf [eq lt le uneq unlt unle unordered])
+(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole") 
+			  (uneq "ueq") (unlt "ult") (unle "ule")
+			  (unordered "un")])
+
+;; This iterator and attribute allow to combine most atomic operations.
+(define_code_iterator ATOMIC [and ior xor plus minus mult])
+(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") 
+			  (plus "add") (minus "sub") (mult "nand")])
+
+;; This mode iterator allows the HI and QI patterns to be defined from
+;; the same template.
+(define_mode_iterator HQI [HI QI])
+
+
+;; Attributes.
+
+(define_attr "type"
+  "unknown,jump,call,load,store,move,arith,multi,nop,farith,fmadd,fdiv,fsqrt,fconv,fload,fstore,mul16,mul32,div32,mac16,rsr,wsr,entry"
+  (const_string "unknown"))
+
+(define_attr "mode"
+  "unknown,none,QI,HI,SI,DI,SF,DF,BL"
+  (const_string "unknown"))
+
+(define_attr "length" "" (const_int 1))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")])
+
+
+;; Pipeline model.
+
+;; The Xtensa basically has simple 5-stage RISC pipeline.
+;; Most instructions complete in 1 cycle, and it is OK to assume that
+;; everything is fully pipelined.  The exceptions have special insn
+;; reservations in the pipeline description below.  The Xtensa can
+;; issue one instruction per cycle, so defining CPU units is unnecessary.
+
+(define_insn_reservation "xtensa_any_insn" 1
+			 (eq_attr "type" "!load,fload,rsr,mul16,mul32,fmadd,fconv")
+			 "nothing")
+
+(define_insn_reservation "xtensa_memory" 2
+			 (eq_attr "type" "load,fload")
+			 "nothing")
+
+(define_insn_reservation "xtensa_sreg" 2
+			 (eq_attr "type" "rsr")
+			 "nothing")
+
+(define_insn_reservation "xtensa_mul16" 2
+			 (eq_attr "type" "mul16")
+			 "nothing")
+
+(define_insn_reservation "xtensa_mul32" 2
+			 (eq_attr "type" "mul32")
+			 "nothing")
+
+(define_insn_reservation "xtensa_fmadd" 4
+			 (eq_attr "type" "fmadd")
+			 "nothing")
+
+(define_insn_reservation "xtensa_fconv" 2
+			 (eq_attr "type" "fconv")
+			 "nothing")
+
+;; Include predicates and constraints.
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Addition.
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a")
+	(plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r")
+		 (match_operand:SI 2 "add_operand" "d,O,r,J,N")))]
+  ""
+  "@
+   add.n\t%0, %1, %2
+   addi.n\t%0, %1, %d2
+   add\t%0, %1, %2
+   addi\t%0, %1, %d2
+   addmi\t%0, %1, %x2"
+  [(set_attr "type"	"arith,arith,arith,arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"2,2,3,3,3")])
+
+(define_insn "*addx"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (match_operand:SI 3 "addsubx_operand" "i"))
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_ADDX"
+  "addx%3\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "register_operand" "%f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "add.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Subtraction.
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+        (minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "*subx"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(minus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			   (match_operand:SI 3 "addsubx_operand" "i"))
+		  (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_ADDX"
+  "subx%3\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "register_operand" "f")
+		  (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "sub.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Multiplication.
+
+(define_expand "<u>mulsidi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand"))))]
+  "TARGET_MUL32_HIGH"
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
+  emit_insn (gen_<u>mulsi3_highpart (gen_highpart (SImode, operands[0]),
+				     operands[1], operands[2]));
+  emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp));
+  DONE;
+})
+
+(define_insn "<u>mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "%r"))
+		   (any_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	  (const_int 32))))]
+  "TARGET_MUL32_HIGH"
+  "mul<su>h\t%0, %1, %2"
+  [(set_attr "type"	"mul32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mult:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_MUL32"
+  "mull\t%0, %1, %2"
+  [(set_attr "type"	"mul32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=C,A")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "%r,r"))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "register_operand" "r,r"))))]
+  "TARGET_MUL16 || TARGET_MAC16"
+  "@
+   mul16s\t%0, %1, %2
+   mul.aa.ll\t%1, %2"
+  [(set_attr "type"	"mul16,mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=C,A")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "%r,r"))
+		 (zero_extend:SI
+		  (match_operand:HI 2 "register_operand" "r,r"))))]
+  "TARGET_MUL16 || TARGET_MAC16"
+  "@
+   mul16u\t%0, %1, %2
+   umul.aa.ll\t%1, %2"
+  [(set_attr "type"	"mul16,mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "muladdhisi"
+  [(set (match_operand:SI 0 "register_operand" "=A")
+	(plus:SI (mult:SI (sign_extend:SI
+			   (match_operand:HI 1 "register_operand" "%r"))
+			  (sign_extend:SI
+			   (match_operand:HI 2 "register_operand" "r")))
+		 (match_operand:SI 3 "register_operand" "0")))]
+  "TARGET_MAC16"
+  "mula.aa.ll\t%1, %2"
+  [(set_attr "type"	"mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulsubhisi"
+  [(set (match_operand:SI 0 "register_operand" "=A")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (mult:SI (sign_extend:SI
+			    (match_operand:HI 2 "register_operand" "%r"))
+			   (sign_extend:SI
+			    (match_operand:HI 3 "register_operand" "r")))))]
+  "TARGET_MAC16"
+  "muls.aa.ll\t%2, %3"
+  [(set_attr "type"	"mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "register_operand" "%f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "mul.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "fmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "0")))]
+  "TARGET_HARD_FLOAT"
+  "madd.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+;; Note that (C - A*B) = (-A*B + C)
+(define_insn "fnmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "0")))]
+  "TARGET_HARD_FLOAT"
+  "msub.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Division.
+
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "quos\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(udiv:SI (match_operand:SI 1 "register_operand" "r")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "quou\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT_DIV"
+  "div.s\t%0, %1, %2"
+  [(set_attr "type"	"fdiv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*recipsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT_RECIP && flag_unsafe_math_optimizations"
+  "recip.s\t%0, %2"
+  [(set_attr "type"	"fdiv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Remainders.
+
+(define_insn "modsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mod:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "rems\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(umod:SI (match_operand:SI 1 "register_operand" "r")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "remu\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Square roots.
+
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT_SQRT"
+  "sqrt.s\t%0, %1"
+  [(set_attr "type"	"fsqrt")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*rsqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(sqrt:SF (match_operand:SF 2 "register_operand" "f"))))]
+  "TARGET_HARD_FLOAT_RSQRT && flag_unsafe_math_optimizations"
+  "rsqrt.s\t%0, %2"
+  [(set_attr "type"	"fsqrt")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Absolute value.
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(abs:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_ABS"
+  "abs\t%0, %1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "abs.s\t%0, %1"
+  [(set_attr "type"	"farith")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Min and max.
+
+(define_insn "<code>si3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+        (any_minmax:SI (match_operand:SI 1 "register_operand" "%r")
+		       (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_MINMAX"
+  "<minmax>\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Count leading/trailing zeros and find first bit.
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(clz:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_NSA"
+  "nsau\t%0, %1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_expand "ctzsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ctz:SI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_NSA"
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_negsi2 (temp, operands[1]));
+  emit_insn (gen_andsi3 (temp, temp, operands[1]));
+  emit_insn (gen_clzsi2 (temp, temp));
+  emit_insn (gen_negsi2 (temp, temp));
+  emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (31)));
+  DONE;
+})
+
+(define_expand "ffssi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ffs:SI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_NSA"
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_negsi2 (temp, operands[1]));
+  emit_insn (gen_andsi3 (temp, temp, operands[1]));
+  emit_insn (gen_clzsi2 (temp, temp));
+  emit_insn (gen_negsi2 (temp, temp));
+  emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32)));
+  DONE;
+})
+
+
+;; Negation and one's complement.
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "neg\t%0, %1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_movsi (temp, constm1_rtx));
+  emit_insn (gen_xorsi3 (operands[0], temp, operands[1]));
+  DONE;
+})
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "neg.s\t%0, %1"
+  [(set_attr "type"	"farith")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Logical instructions.
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(and:SI (match_operand:SI 1 "register_operand" "%r,r")
+		(match_operand:SI 2 "mask_operand" "P,r")))]
+  ""
+  "@
+   extui\t%0, %1, 0, %K2
+   and\t%0, %1, %2"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(ior:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "or\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(xor:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "xor\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Zero-extend instructions.
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(zero_extend:SI (match_operand:HI 1 "nonimmed_operand" "r,U")))]
+  ""
+  "@
+   extui\t%0, %1, 0, 16
+   l16ui\t%0, %1"
+  [(set_attr "type"	"arith,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(zero_extend:SI (match_operand:QI 1 "nonimmed_operand" "r,U")))]
+  ""
+  "@
+   extui\t%0, %1, 0, 8
+   l8ui\t%0, %1"
+  [(set_attr "type"	"arith,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+
+;; Sign-extend instructions.
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+{
+  if (sext_operand (operands[1], HImode))
+    emit_insn (gen_extendhisi2_internal (operands[0], operands[1]));
+  else
+    xtensa_extend_reg (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "extendhisi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=B,a")
+	(sign_extend:SI (match_operand:HI 1 "sext_operand" "r,U")))]
+  ""
+  "@
+   sext\t%0, %1, 15
+   l16si\t%0, %1"
+  [(set_attr "type"	"arith,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  ""
+{
+  if (TARGET_SEXT)
+    emit_insn (gen_extendqisi2_internal (operands[0], operands[1]));
+  else
+    xtensa_extend_reg (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "extendqisi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=B")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_SEXT"
+  "sext\t%0, %1, 7"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Field extract instructions.
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_SEXT"
+{
+  if (!sext_fldsz_operand (operands[2], SImode))
+    FAIL;
+
+  /* We could expand to a right shift followed by SEXT but that's
+     no better than the standard left and right shift sequence.  */
+  if (!lsbitnum_operand (operands[3], SImode))
+    FAIL;
+
+  emit_insn (gen_extv_internal (operands[0], operands[1],
+				operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "extv_internal"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "sext_fldsz_operand" "i")
+			 (match_operand:SI 3 "lsbitnum_operand" "i")))]
+  "TARGET_SEXT"
+{
+  int fldsz = INTVAL (operands[2]);
+  operands[2] = GEN_INT (fldsz - 1);
+  return "sext\t%0, %1, %2";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  ""
+{
+  if (!extui_fldsz_operand (operands[2], SImode))
+    FAIL;
+  emit_insn (gen_extzv_internal (operands[0], operands[1],
+				 operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "extzv_internal"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "extui_fldsz_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i")))]
+  ""
+{
+  int shift;
+  if (BITS_BIG_ENDIAN)
+    shift = (32 - (INTVAL (operands[2]) + INTVAL (operands[3]))) & 0x1f;
+  else
+    shift = INTVAL (operands[3]) & 0x1f;
+  operands[3] = GEN_INT (shift);
+  return "extui\t%0, %1, %3, %2";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Conversions.
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(fix:SI (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "trunc.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "fixuns_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unsigned_fix:SI (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "utrunc.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "register_operand" "a")))]
+  "TARGET_HARD_FLOAT"
+  "float.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "floatunssisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unsigned_float:SF (match_operand:SI 1 "register_operand" "a")))]
+  "TARGET_HARD_FLOAT"
+  "ufloat.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Data movement instructions.
+
+;; 64-bit Integer moves
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmed_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  if (CONSTANT_P (operands[1]) && !TARGET_CONST16)
+    operands[1] = force_const_mem (DImode, operands[1]);
+
+  if (!register_operand (operands[0], DImode)
+      && !register_operand (operands[1], DImode))
+    operands[1] = force_reg (DImode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn_and_split "movdi_internal"
+  [(set (match_operand:DI 0 "nonimmed_operand" "=a,W,a,a,U")
+	(match_operand:DI 1 "move_operand" "r,i,T,U,r"))]
+  "register_operand (operands[0], DImode)
+   || register_operand (operands[1], DImode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+{
+  xtensa_split_operand_pair (operands, SImode);
+  if (reg_overlap_mentioned_p (operands[0], operands[3]))
+    {
+      rtx tmp;
+      tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
+      tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+    }
+})
+
+;; 32-bit Integer moves
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmed_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  if (xtensa_emit_move_sequence (operands, SImode))
+    DONE;
+})
+
+(define_insn "movsi_internal"
+  [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,W,a,a,U,*a,*A")
+	(match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,i,T,U,r,*A,*r"))]
+  "xtensa_valid_move (SImode, operands)"
+  "@
+   movi.n\t%0, %x1
+   mov.n\t%0, %1
+   mov.n\t%0, %1
+   %v1l32i.n\t%0, %1
+   %v0s32i.n\t%1, %0
+   %v0s32i.n\t%1, %0
+   mov\t%0, %1
+   movsp\t%0, %1
+   movi\t%0, %x1
+   const16\t%0, %t1\;const16\t%0, %b1
+   %v1l32r\t%0, %1
+   %v1l32i\t%0, %1
+   %v0s32i\t%1, %0
+   rsr\t%0, ACCLO
+   wsr\t%1, ACCLO"
+  [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,load,load,store,rsr,wsr")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"2,2,2,2,2,2,3,3,3,6,3,3,3,3,3")])
+
+;; 16-bit Integer moves
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmed_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  if (xtensa_emit_move_sequence (operands, HImode))
+    DONE;
+})
+
+(define_insn "movhi_internal"
+  [(set (match_operand:HI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
+	(match_operand:HI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+  "xtensa_valid_move (HImode, operands)"
+  "@
+   movi.n\t%0, %x1
+   mov.n\t%0, %1
+   mov\t%0, %1
+   movi\t%0, %x1
+   %v1l16ui\t%0, %1
+   %v0s16i\t%1, %0
+   rsr\t%0, ACCLO
+   wsr\t%1, ACCLO"
+  [(set_attr "type"	"move,move,move,move,load,store,rsr,wsr")
+   (set_attr "mode"	"HI")
+   (set_attr "length"	"2,2,3,3,3,3,3,3")])
+
+;; 8-bit Integer moves
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmed_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  if (xtensa_emit_move_sequence (operands, QImode))
+    DONE;
+})
+
+(define_insn "movqi_internal"
+  [(set (match_operand:QI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
+	(match_operand:QI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+  "xtensa_valid_move (QImode, operands)"
+  "@
+   movi.n\t%0, %x1
+   mov.n\t%0, %1
+   mov\t%0, %1
+   movi\t%0, %x1
+   %v1l8ui\t%0, %1
+   %v0s8i\t%1, %0
+   rsr\t%0, ACCLO
+   wsr\t%1, ACCLO"
+  [(set_attr "type"	"move,move,move,move,load,store,rsr,wsr")
+   (set_attr "mode"	"QI")
+   (set_attr "length"	"2,2,3,3,3,3,3,3")])
+
+;; Sub-word reloads from the constant pool.
+
+(define_expand "reload<mode>_literal"
+  [(parallel [(match_operand:HQI 0 "register_operand" "=r")
+	      (match_operand:HQI 1 "constantpool_operand" "")
+	      (match_operand:SI 2 "register_operand" "=&r")])]
+  ""
+{
+  rtx lit, scratch;
+  unsigned word_off, byte_off;
+
+  if (MEM_P (operands[1]))
+    {
+      lit = operands[1];
+      word_off = 0;
+      byte_off = 0;
+    }
+  else
+    {
+      gcc_assert (GET_CODE (operands[1]) == SUBREG);
+      lit = SUBREG_REG (operands[1]);
+      word_off = SUBREG_BYTE (operands[1]) & ~(UNITS_PER_WORD - 1);
+      byte_off = SUBREG_BYTE (operands[1]) - word_off;
+    }
+
+  lit = adjust_address (lit, SImode, word_off);
+  scratch = operands[2];
+  emit_insn (gen_movsi (scratch, lit));
+  emit_insn (gen_mov<mode> (operands[0],
+			    gen_rtx_SUBREG (<MODE>mode, scratch, byte_off)));
+
+  DONE;
+})
+
+;; 32-bit floating point moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmed_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  if (!TARGET_CONST16 && CONSTANT_P (operands[1]))
+    operands[1] = force_const_mem (SFmode, operands[1]);
+
+  if ((!register_operand (operands[0], SFmode)
+       && !register_operand (operands[1], SFmode))
+      || (FP_REG_P (xt_true_regnum (operands[0]))
+	  && !(reload_in_progress | reload_completed)
+	  && (constantpool_mem_p (operands[1])
+	      || CONSTANT_P (operands[1]))))
+    operands[1] = force_reg (SFmode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn "movsf_internal"
+  [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,D,R,a,f,a,W,a,a,U")
+	(match_operand:SF 1 "move_operand" "f,U,f,d,R,d,r,r,f,iF,T,U,r"))]
+  "((register_operand (operands[0], SFmode)
+     || register_operand (operands[1], SFmode))
+    && !(FP_REG_P (xt_true_regnum (operands[0]))
+         && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))"
+  "@
+   mov.s\t%0, %1
+   %v1lsi\t%0, %1
+   %v0ssi\t%1, %0
+   mov.n\t%0, %1
+   %v1l32i.n\t%0, %1
+   %v0s32i.n\t%1, %0
+   mov\t%0, %1
+   wfr\t%0, %1
+   rfr\t%0, %1
+   const16\t%0, %t1\;const16\t%0, %b1
+   %v1l32r\t%0, %1
+   %v1l32i\t%0, %1
+   %v0s32i\t%1, %0"
+  [(set_attr "type"	"farith,fload,fstore,move,load,store,move,farith,farith,move,load,load,store")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3,3,3,2,2,2,3,3,3,6,3,3,3")])
+
+(define_insn "*lsiu"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mem:SF (plus:SI (match_operand:SI 1 "register_operand" "+a")
+			 (match_operand:SI 2 "fpmem_offset_operand" "i"))))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT"
+{
+  if (TARGET_SERIALIZE_VOLATILE && volatile_refs_p (PATTERN (insn)))
+    output_asm_insn ("memw", operands);
+  return "lsiu\t%0, %1, %2";
+}
+  [(set_attr "type"	"fload")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*ssiu"
+  [(set (mem:SF (plus:SI (match_operand:SI 0 "register_operand" "+a")
+			 (match_operand:SI 1 "fpmem_offset_operand" "i")))
+	(match_operand:SF 2 "register_operand" "f"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))]
+  "TARGET_HARD_FLOAT"
+{
+  if (TARGET_SERIALIZE_VOLATILE && volatile_refs_p (PATTERN (insn)))
+    output_asm_insn ("memw", operands);
+  return "ssiu\t%2, %0, %1";
+}
+  [(set_attr "type"	"fstore")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+;; 64-bit floating point moves
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmed_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  if (CONSTANT_P (operands[1]) && !TARGET_CONST16)
+    operands[1] = force_const_mem (DFmode, operands[1]);
+
+  if (!register_operand (operands[0], DFmode)
+      && !register_operand (operands[1], DFmode))
+    operands[1] = force_reg (DFmode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn_and_split "movdf_internal"
+  [(set (match_operand:DF 0 "nonimmed_operand" "=a,W,a,a,U")
+	(match_operand:DF 1 "move_operand" "r,iF,T,U,r"))]
+  "register_operand (operands[0], DFmode)
+   || register_operand (operands[1], DFmode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+{
+  xtensa_split_operand_pair (operands, SFmode);
+  if (reg_overlap_mentioned_p (operands[0], operands[3]))
+    {
+      rtx tmp;
+      tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
+      tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+    }
+})
+
+;; Block moves
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (use (match_operand:SI 2 "arith_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  ""
+{
+  if (!xtensa_expand_block_move (operands))
+    FAIL;
+  DONE;
+})
+
+
+;; Shift instructions.
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "arith_operand" "")))]
+  ""
+{
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn "ashlsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(ashift:SI (match_operand:SI 1 "register_operand" "r,r")
+		   (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""      
+  "@
+   slli\t%0, %1, %R2
+   ssl\t%2\;sll\t%0, %1"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,6")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+  "@
+   srai\t%0, %1, %R2
+   ssr\t%2\;sra\t%0, %1"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,6")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+{
+  if (which_alternative == 0)
+    {
+      if ((INTVAL (operands[2]) & 0x1f) < 16)
+        return "srli\t%0, %1, %R2";
+      else
+      	return "extui\t%0, %1, %R2, %L2";
+    }
+  return "ssr\t%2\;srl\t%0, %1";
+}
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,6")])
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(rotate:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+  "@
+   ssai\t%L2\;src\t%0, %1, %1
+   ssl\t%2\;src\t%0, %1, %1"
+  [(set_attr "type"	"multi,multi")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6,6")])
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+  "@
+   ssai\t%R2\;src\t%0, %1, %1
+   ssr\t%2\;src\t%0, %1, %1"
+  [(set_attr "type"	"multi,multi")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6,6")])
+
+
+;; Comparisons.
+
+;; Conditional branches.
+
+(define_expand "cbranchsi4"
+  [(match_operator 0 "comparison_operator"
+    [(match_operand:SI 1 "register_operand")
+     (match_operand:SI 2 "nonmemory_operand")])
+   (match_operand 3 "")]
+  ""
+{
+  xtensa_expand_conditional_branch (operands, SImode);
+  DONE;
+})
+
+(define_expand "cbranchsf4"
+  [(match_operator 0 "comparison_operator"
+    [(match_operand:SF 1 "register_operand")
+     (match_operand:SF 2 "register_operand")])
+   (match_operand 3 "")]
+  "TARGET_HARD_FLOAT"
+{
+  xtensa_expand_conditional_branch (operands, SFmode);
+  DONE;
+})
+
+;; Branch patterns for standard integer comparisons
+
+(define_insn "*btrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "branch_operator"
+		       [(match_operand:SI 0 "register_operand" "r,r")
+			(match_operand:SI 1 "branch_operand" "K,r")])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return xtensa_emit_branch (false, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+(define_insn "*bfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "branch_operator"
+		       [(match_operand:SI 0 "register_operand" "r,r")
+			(match_operand:SI 1 "branch_operand" "K,r")])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return xtensa_emit_branch (true, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+(define_insn "*ubtrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "ubranch_operator"
+		       [(match_operand:SI 0 "register_operand" "r,r")
+			(match_operand:SI 1 "ubranch_operand" "L,r")])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return xtensa_emit_branch (false, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+(define_insn "*ubfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "ubranch_operator"
+			 [(match_operand:SI 0 "register_operand" "r,r")
+			  (match_operand:SI 1 "ubranch_operand" "L,r")])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return xtensa_emit_branch (true, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+;; Branch patterns for bit testing
+
+(define_insn "*bittrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+			[(zero_extract:SI
+			    (match_operand:SI 0 "register_operand" "r,r")
+			    (const_int 1)
+			    (match_operand:SI 1 "arith_operand" "J,r"))
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return xtensa_emit_bit_branch (false, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*bitfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+			[(zero_extract:SI
+			    (match_operand:SI 0 "register_operand" "r,r")
+			    (const_int 1)
+			    (match_operand:SI 1 "arith_operand" "J,r"))
+			 (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return xtensa_emit_bit_branch (true, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*masktrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+		 [(and:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "register_operand" "r"))
+		  (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:		return "bnone\t%0, %1, %2";
+    case NE:		return "bany\t%0, %1, %2";
+    default:		gcc_unreachable ();
+    }
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*maskfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+		 [(and:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "register_operand" "r"))
+		  (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:		return "bany\t%0, %1, %2";
+    case NE:		return "bnone\t%0, %1, %2";
+    default:		gcc_unreachable ();
+    }
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Define the loop insns used by bct optimization to represent the
+;; start and end of a zero-overhead loop (in loop.c).  This start
+;; template generates the loop insn; the end template doesn't generate
+;; any instructions since loop end is handled in hardware.
+
+(define_insn "zero_cost_loop_start"
+  [(set (pc)
+	(if_then_else (eq (match_operand:SI 0 "register_operand" "a")
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (reg:SI 19)
+	(plus:SI (match_dup 0) (const_int -1)))]
+  ""
+  "loopnez\t%0, %l1"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "zero_cost_loop_end"
+  [(set (pc)
+	(if_then_else (ne (reg:SI 19) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (set (reg:SI 19)
+	(plus:SI (reg:SI 19) (const_int -1)))]
+  ""
+{
+    xtensa_emit_loop_end (insn, operands);
+    return "";
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"0")])
+
+
+;; Setting a register from a comparison.
+
+(define_expand "cstoresi4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operator 1 "xtensa_cstoresi_operator"
+    [(match_operand:SI 2 "register_operand")
+     (match_operand:SI 3 "nonmemory_operand")])]
+  ""
+{
+  if (!xtensa_expand_scc (operands, SImode))
+    FAIL;
+  DONE;
+})
+
+(define_expand "cstoresf4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operator:SI 1 "comparison_operator"
+    [(match_operand:SF 2 "register_operand")
+     (match_operand:SF 3 "register_operand")])]
+  "TARGET_HARD_FLOAT"
+{
+  if (!xtensa_expand_scc (operands, SFmode))
+    FAIL;
+  DONE;
+})
+
+
+
+;; Conditional moves.
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  ""
+{
+  if (!xtensa_expand_conditional_move (operands, 0))
+    FAIL;
+  DONE;
+})
+
+(define_expand "movsfcc"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (match_operand 1 "comparison_operator" "")
+			 (match_operand:SF 2 "register_operand" "")
+			 (match_operand:SF 3 "register_operand" "")))]
+  ""
+{
+  if (!xtensa_expand_conditional_move (operands, 1))
+    FAIL;
+  DONE;
+})
+
+(define_insn "movsicc_internal0"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(if_then_else:SI (match_operator 4 "branch_operator"
+			   [(match_operand:SI 1 "register_operand" "r,r")
+			    (const_int 0)])
+			 (match_operand:SI 2 "register_operand" "r,0")
+			 (match_operand:SI 3 "register_operand" "0,r")))]
+  ""
+{
+  return xtensa_emit_movcc (which_alternative == 1, false, false, operands);
+}
+  [(set_attr "type"	"move,move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "movsicc_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(if_then_else:SI (match_operator 4 "boolean_operator"
+			   [(match_operand:CC 1 "register_operand" "b,b")
+			    (const_int 0)])
+			 (match_operand:SI 2 "register_operand" "r,0")
+			 (match_operand:SI 3 "register_operand" "0,r")))]
+  "TARGET_BOOLEANS"
+{
+  return xtensa_emit_movcc (which_alternative == 1, false, true, operands);
+}
+  [(set_attr "type"	"move,move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "movsfcc_internal0"
+  [(set (match_operand:SF 0 "register_operand" "=a,a,f,f")
+	(if_then_else:SF (match_operator 4 "branch_operator"
+			   [(match_operand:SI 1 "register_operand" "r,r,r,r")
+			    (const_int 0)])
+			 (match_operand:SF 2 "register_operand" "r,0,f,0")
+			 (match_operand:SF 3 "register_operand" "0,r,0,f")))]
+  ""
+{
+  return xtensa_emit_movcc ((which_alternative & 1) == 1,
+			    which_alternative >= 2, false, operands);
+}
+  [(set_attr "type"	"move,move,move,move")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3,3,3,3")])
+
+(define_insn "movsfcc_internal1"
+  [(set (match_operand:SF 0 "register_operand" "=a,a,f,f")
+	(if_then_else:SF (match_operator 4 "boolean_operator"
+			   [(match_operand:CC 1 "register_operand" "b,b,b,b")
+			    (const_int 0)])
+			 (match_operand:SF 2 "register_operand" "r,0,f,0")
+			 (match_operand:SF 3 "register_operand" "0,r,0,f")))]
+  "TARGET_BOOLEANS"
+{
+  return xtensa_emit_movcc ((which_alternative & 1) == 1,
+			    which_alternative >= 2, true, operands);
+}
+  [(set_attr "type"	"move,move,move,move")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3,3,3,3")])
+
+
+;; Floating-point comparisons.
+
+(define_insn "s<code>_sf"
+  [(set (match_operand:CC 0 "register_operand" "=b")
+	(any_scc_sf:CC (match_operand:SF 1 "register_operand" "f")
+		       (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "<scc_sf>.s\t%0, %1, %2"
+  [(set_attr "type"	"farith")
+   (set_attr "mode"	"BL")
+   (set_attr "length"	"3")])
+
+
+;; Unconditional branches.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "j\t%l0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_expand "indirect_jump"
+  [(set (pc)
+	(match_operand 0 "register_operand" ""))]
+  ""
+{
+  rtx dest = operands[0];
+  if (GET_CODE (dest) != REG || GET_MODE (dest) != Pmode)
+    operands[0] = copy_to_mode_reg (Pmode, dest);
+
+  emit_jump_insn (gen_indirect_jump_internal (dest));
+  DONE;
+})
+
+(define_insn "indirect_jump_internal"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jx\t%0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+(define_expand "tablejump"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+   ""
+{
+  rtx target = operands[0];
+  if (flag_pic)
+    {
+      /* For PIC, the table entry is relative to the start of the table.  */
+      rtx label = gen_reg_rtx (SImode);
+      target = gen_reg_rtx (SImode);
+      emit_move_insn (label, gen_rtx_LABEL_REF (SImode, operands[1]));
+      emit_insn (gen_addsi3 (target, operands[0], label));
+    }
+  emit_jump_insn (gen_tablejump_internal (target, operands[1]));
+  DONE;
+})
+
+(define_insn "tablejump_internal"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jx\t%0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Function calls.
+
+(define_expand "sym_PLT"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PLT))]
+  ""
+  "")
+
+(define_expand "call"
+  [(call (match_operand 0 "memory_operand" "")
+	 (match_operand 1 "" ""))]
+  ""
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (flag_pic && GET_CODE (addr) == SYMBOL_REF
+      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
+    addr = gen_sym_PLT (addr);
+  if (!call_insn_operand (addr, VOIDmode))
+    XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr);
+})
+
+(define_insn "call_internal"
+  [(call (mem (match_operand:SI 0 "call_insn_operand" "nir"))
+	 (match_operand 1 "" "i"))]
+  ""
+{
+  return xtensa_emit_call (0, operands);
+}
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_expand "call_value"
+  [(set (match_operand 0 "register_operand" "")
+	(call (match_operand 1 "memory_operand" "")
+	      (match_operand 2 "" "")))]
+  ""
+{
+  rtx addr = XEXP (operands[1], 0);
+  if (flag_pic && GET_CODE (addr) == SYMBOL_REF
+      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
+    addr = gen_sym_PLT (addr);
+  if (!call_insn_operand (addr, VOIDmode))
+    XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr);
+})
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "register_operand" "=a")
+        (call (mem (match_operand:SI 1 "call_insn_operand" "nir"))
+              (match_operand 2 "" "i")))]
+  ""
+{
+  return xtensa_emit_call (1, operands);
+}
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "entry"
+  [(set (reg:SI A1_REG)
+	(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")]
+			    UNSPECV_ENTRY))]
+  ""
+  "entry\tsp, %0"
+  [(set_attr "type"	"entry")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "return"
+  [(return)
+   (use (reg:SI A0_REG))]
+  "reload_completed"
+{
+  return (TARGET_DENSITY ? "retw.n" : "retw");
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"2")])
+
+
+;; Miscellaneous instructions.
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  xtensa_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  emit_jump_insn (gen_return ());
+  DONE;
+})
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+{
+  return (TARGET_DENSITY ? "nop.n" : "nop");
+}
+  [(set_attr "type"	"nop")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_expand "nonlocal_goto"
+  [(match_operand:SI 0 "general_operand" "")
+   (match_operand:SI 1 "general_operand" "")
+   (match_operand:SI 2 "general_operand" "")
+   (match_operand:SI 3 "" "")]
+  ""
+{
+  xtensa_expand_nonlocal_goto (operands);
+  DONE;
+})
+
+;; Stuff an address into the return address register along with the window
+;; size in the high bits.  Because we don't have the window size of the
+;; previous frame, assume the function called out with a CALL8 since that
+;; is what compilers always use.  Note: __builtin_frob_return_addr has
+;; already been applied to the handler, but the generic version doesn't
+;; allow us to frob it quite enough, so we just frob here.
+
+(define_insn_and_split "eh_return"
+  [(set (reg:SI A0_REG)
+	(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+			    UNSPECV_EH_RETURN))
+   (clobber (match_scratch:SI 1 "=r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (ashift:SI (match_dup 0) (const_int 2)))
+   (set (match_dup 1) (plus:SI (match_dup 1) (const_int 2)))
+   (set (reg:SI A0_REG) (rotatert:SI (match_dup 1) (const_int 2)))]
+  "")
+
+;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't
+;; know if a frame pointer is required until the reload pass, and
+;; because there may be an incoming argument value in the hard frame
+;; pointer register (a7).  If there is an incoming argument in that
+;; register, the "set_frame_ptr" insn gets inserted immediately after
+;; the insn that copies the incoming argument to a pseudo or to the
+;; stack.  This serves several purposes here: (1) it keeps the
+;; optimizer from copy-propagating or scheduling the use of a7 as an
+;; incoming argument away from the beginning of the function; (2) we
+;; can use a post-reload splitter to expand away the insn if a frame
+;; pointer is not required, so that the post-reload scheduler can do
+;; the right thing; and (3) it makes it easy for the prologue expander
+;; to search for this insn to determine whether it should add a new insn
+;; to set up the frame pointer.
+
+(define_insn "set_frame_ptr"
+  [(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))]
+  ""
+{
+  if (frame_pointer_needed)
+    return "mov\ta7, sp";
+  return "";
+}
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+;; Post-reload splitter to remove fp assignment when it's not needed.
+(define_split
+  [(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))]
+  "reload_completed && !frame_pointer_needed"
+  [(unspec [(const_int 0)] UNSPEC_NOP)]
+  "")
+
+;; The preceding splitter needs something to split the insn into;
+;; things start breaking if the result is just a "use" so instead we
+;; generate the following insn.
+(define_insn "*unspec_nop"
+  [(unspec [(const_int 0)] UNSPEC_NOP)]
+  ""
+  ""
+  [(set_attr "type"	"nop")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"0")])
+
+
+;; TLS support
+
+(define_expand "sym_TPOFF"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_TPOFF))]
+  ""
+  "")
+
+(define_expand "sym_DTPOFF"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_DTPOFF))]
+  ""
+  "")
+
+(define_insn "load_tp"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_THREADPTR"
+  "rur\t%0, THREADPTR"
+  [(set_attr "type"	"rsr")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "set_tp"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+		    UNSPECV_SET_TP)]
+  "TARGET_THREADPTR"
+  "wur\t%0, THREADPTR"
+  [(set_attr "type"	"wsr")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "tls_func"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "tls_symbol_operand" "")]
+		   UNSPEC_TLS_FUNC))]
+  "TARGET_THREADPTR && HAVE_AS_TLS"
+  "movi\t%0, %1@TLSFUNC"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "tls_arg"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "tls_symbol_operand" "")]
+		   UNSPEC_TLS_ARG))]
+  "TARGET_THREADPTR && HAVE_AS_TLS"
+  "movi\t%0, %1@TLSARG"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "tls_call"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(call (mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "tls_symbol_operand" "")]
+				  UNSPEC_TLS_CALL))
+	      (match_operand 3 "" "i")))]
+  "TARGET_THREADPTR && HAVE_AS_TLS"
+  "callx8.tls %1, %2@TLSCALL"
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Instructions for the Xtensa "boolean" option.
+
+(define_insn "*booltrue"
+  [(set (pc)
+	(if_then_else (match_operator 2 "boolean_operator"
+			 [(match_operand:CC 0 "register_operand" "b")
+			  (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "TARGET_BOOLEANS"
+{
+  if (GET_CODE (operands[2]) == EQ)
+    return "bf\t%0, %1";
+  else
+    return "bt\t%0, %1";
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*boolfalse"
+  [(set (pc)
+	(if_then_else (match_operator 2 "boolean_operator"
+			 [(match_operand:CC 0 "register_operand" "b")
+			  (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  "TARGET_BOOLEANS"
+{
+  if (GET_CODE (operands[2]) == EQ)
+    return "bt\t%0, %1";
+  else
+    return "bf\t%0, %1";
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Atomic operations
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMW))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMW))]
+  ""
+  "memw"
+  [(set_attr "type"	"unknown")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+;; sync_lock_release is only implemented for SImode.
+;; For other modes, just use the default of a store with a memory_barrier.
+(define_insn "sync_lock_releasesi"
+  [(set (match_operand:SI 0 "mem_operand" "=U")
+	(unspec_volatile:SI
+	  [(match_operand:SI 1 "register_operand" "r")]
+	  UNSPECV_S32RI))]
+  "TARGET_RELEASE_SYNC"
+  "s32ri\t%1, %0"
+  [(set_attr "type"	"store")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "sync_compare_and_swapsi"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=a")
+	  (match_operand:SI 1 "mem_operand" "+U"))
+     (set (match_dup 1)
+	  (unspec_volatile:SI
+	    [(match_dup 1)
+	     (match_operand:SI 2 "register_operand" "r")
+	     (match_operand:SI 3 "register_operand" "0")]
+	    UNSPECV_S32C1I))])]
+  "TARGET_S32C1I"
+  "wsr\t%2, SCOMPARE1\;s32c1i\t%3, %1"
+  [(set_attr "type"	"multi")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6")])
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(parallel
+    [(set (match_operand:HQI 0 "register_operand" "")
+	  (match_operand:HQI 1 "mem_operand" ""))
+     (set (match_dup 1)
+	  (unspec_volatile:HQI
+	    [(match_dup 1)
+	     (match_operand:HQI 2 "register_operand" "")
+	     (match_operand:HQI 3 "register_operand" "")]
+	    UNSPECV_S32C1I))])]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_compare_and_swap (operands[0], operands[1],
+				  operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(match_operand:HQI 0 "register_operand")
+   (match_operand:HQI 1 "memory_operand")
+   (match_operand:HQI 2 "register_operand")]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (SET, operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+(define_expand "sync_<atomic><mode>"
+  [(set (match_operand:HQI 0 "memory_operand")
+	(ATOMIC:HQI (match_dup 0)
+		    (match_operand:HQI 1 "register_operand")))]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (<CODE>, NULL_RTX, operands[0], operands[1], false);
+  DONE;
+})
+
+(define_expand "sync_old_<atomic><mode>"
+  [(set (match_operand:HQI 0 "register_operand")
+	(match_operand:HQI 1 "memory_operand"))
+   (set (match_dup 1)
+	(ATOMIC:HQI (match_dup 1)
+		    (match_operand:HQI 2 "register_operand")))]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (<CODE>, operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+(define_expand "sync_new_<atomic><mode>"
+  [(set (match_operand:HQI 0 "register_operand")
+	(ATOMIC:HQI (match_operand:HQI 1 "memory_operand")
+		    (match_operand:HQI 2 "register_operand"))) 
+   (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (<CODE>, operands[0], operands[1], operands[2], true);
+  DONE;
+})
diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt
new file mode 100644
index 000000000..e78104f1c
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.opt
@@ -0,0 +1,43 @@
+; Options for the Tensilica Xtensa port of the compiler.
+
+; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mconst16
+Target Report Mask(CONST16)
+Use CONST16 instruction to load constants
+
+mforce-no-pic
+Target Report Mask(FORCE_NO_PIC)
+Disable position-independent code (PIC) for use in OS kernel code
+
+mlongcalls
+Target
+Use indirect CALLXn instructions for large programs
+
+mtarget-align
+Target
+Automatically align branch targets to reduce branch penalties
+
+mtext-section-literals
+Target
+Intersperse literal pools with code in the text section
+
+mserialize-volatile
+Target Report Mask(SERIALIZE_VOLATILE)
+-mno-serialize-volatile	Do not serialize volatile memory references with MEMW instructions
-- 
cgit v1.2.3